Explorar o código

Import WAMR Fast JIT (#1343)

Import WAMR Fast JIT which is a lightweight JIT with quick startup, small footprint,
relatively good performance (~40% to ~50% of LLVM JIT) and good portability.

Platforms supported: Linux, MacOS and Linux SGX.
Arch supported: x86-64.
Wenyong Huang %!s(int64=3) %!d(string=hai) anos
pai
achega
bf28030993
Modificáronse 68 ficheiros con 22563 adicións e 65 borrados
  1. 11 4
      .github/workflows/compilation_on_android_ubuntu.yml
  2. 2 0
      .gitignore
  3. 10 0
      ATTRIBUTIONS.md
  4. 1 1
      README.md
  5. 2 0
      build-scripts/config_common.cmake
  6. 10 1
      build-scripts/runtime_lib.cmake
  7. 12 0
      core/config.h
  8. 1 0
      core/iwasm/common/wasm_exec_env.c
  9. 11 0
      core/iwasm/common/wasm_exec_env.h
  10. 27 0
      core/iwasm/common/wasm_runtime_common.c
  11. 42 0
      core/iwasm/fast-jit/asmjit_sgx_patch.diff
  12. 17 0
      core/iwasm/fast-jit/cg/LICENSE_ASMJIT
  13. 23 0
      core/iwasm/fast-jit/cg/LICENSE_ZYDIS
  14. 7027 0
      core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
  15. 345 0
      core/iwasm/fast-jit/fe/jit_emit_compare.c
  16. 32 0
      core/iwasm/fast-jit/fe/jit_emit_compare.h
  17. 47 0
      core/iwasm/fast-jit/fe/jit_emit_const.c
  18. 31 0
      core/iwasm/fast-jit/fe/jit_emit_const.h
  19. 1209 0
      core/iwasm/fast-jit/fe/jit_emit_control.c
  20. 56 0
      core/iwasm/fast-jit/fe/jit_emit_control.h
  21. 660 0
      core/iwasm/fast-jit/fe/jit_emit_conversion.c
  22. 73 0
      core/iwasm/fast-jit/fe/jit_emit_conversion.h
  23. 78 0
      core/iwasm/fast-jit/fe/jit_emit_exception.c
  24. 23 0
      core/iwasm/fast-jit/fe/jit_emit_exception.h
  25. 535 0
      core/iwasm/fast-jit/fe/jit_emit_function.c
  26. 39 0
      core/iwasm/fast-jit/fe/jit_emit_function.h
  27. 782 0
      core/iwasm/fast-jit/fe/jit_emit_memory.c
  28. 89 0
      core/iwasm/fast-jit/fe/jit_emit_memory.h
  29. 1651 0
      core/iwasm/fast-jit/fe/jit_emit_numberic.c
  30. 76 0
      core/iwasm/fast-jit/fe/jit_emit_numberic.h
  31. 130 0
      core/iwasm/fast-jit/fe/jit_emit_parametric.c
  32. 25 0
      core/iwasm/fast-jit/fe/jit_emit_parametric.h
  33. 318 0
      core/iwasm/fast-jit/fe/jit_emit_table.c
  34. 47 0
      core/iwasm/fast-jit/fe/jit_emit_table.h
  35. 323 0
      core/iwasm/fast-jit/fe/jit_emit_variable.c
  36. 35 0
      core/iwasm/fast-jit/fe/jit_emit_variable.h
  37. 95 0
      core/iwasm/fast-jit/iwasm_fast_jit.cmake
  38. 65 0
      core/iwasm/fast-jit/jit_codecache.c
  39. 31 0
      core/iwasm/fast-jit/jit_codecache.h
  40. 22 0
      core/iwasm/fast-jit/jit_codegen.c
  41. 84 0
      core/iwasm/fast-jit/jit_codegen.h
  42. 176 0
      core/iwasm/fast-jit/jit_compiler.c
  43. 143 0
      core/iwasm/fast-jit/jit_compiler.h
  44. 331 0
      core/iwasm/fast-jit/jit_dump.c
  45. 54 0
      core/iwasm/fast-jit/jit_dump.h
  46. 2248 0
      core/iwasm/fast-jit/jit_frontend.c
  47. 521 0
      core/iwasm/fast-jit/jit_frontend.h
  48. 1403 0
      core/iwasm/fast-jit/jit_ir.c
  49. 302 0
      core/iwasm/fast-jit/jit_ir.def
  50. 1874 0
      core/iwasm/fast-jit/jit_ir.h
  51. 840 0
      core/iwasm/fast-jit/jit_regalloc.c
  52. 19 0
      core/iwasm/fast-jit/jit_utils.c
  53. 136 0
      core/iwasm/fast-jit/jit_utils.h
  54. 3 0
      core/iwasm/include/wasm_export.h
  55. 21 1
      core/iwasm/interpreter/wasm.h
  56. 11 6
      core/iwasm/interpreter/wasm_interp.h
  57. 66 0
      core/iwasm/interpreter/wasm_interp_classic.c
  58. 55 4
      core/iwasm/interpreter/wasm_loader.c
  59. 37 3
      core/iwasm/interpreter/wasm_mini_loader.c
  60. 82 12
      core/iwasm/interpreter/wasm_runtime.c
  61. 32 6
      core/iwasm/interpreter/wasm_runtime.h
  62. 39 20
      doc/build_wamr.md
  63. 8 2
      doc/linux_sgx.md
  64. 5 0
      product-mini/platforms/darwin/CMakeLists.txt
  65. 5 0
      product-mini/platforms/linux-sgx/CMakeLists.txt
  66. 14 0
      product-mini/platforms/linux/CMakeLists.txt
  67. 18 0
      product-mini/platforms/posix/main.c
  68. 23 5
      tests/wamr-test-suites/test_wamr.sh

+ 11 - 4
.github/workflows/compilation_on_android_ubuntu.yml

@@ -454,7 +454,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        test_option: [$DEFAULT_TEST_OPTIONS, $SIMD_TEST_OPTIONS]
+        test_option: [$DEFAULT_TEST_OPTIONS]
     steps:
       - name: checkout
         uses: actions/checkout@v3
@@ -488,7 +488,13 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        test_option: [$MULTI_MODULES_TEST_OPTIONS, $THREADS_TEST_OPTIONS]
+        running_mode: ["classic-interp", "fast-interp", "jit", "aot"]
+        test_option:
+          [
+            $MULTI_MODULES_TEST_OPTIONS,
+            $SIMD_TEST_OPTIONS,
+            $THREADS_TEST_OPTIONS,
+          ]
     steps:
       - name: checkout
         uses: actions/checkout@v3
@@ -513,7 +519,7 @@ jobs:
         run: sudo apt install -y ninja-build
 
       - name: run spec tests
-        run: ./test_wamr.sh ${{ matrix.test_option }}
+        run: ./test_wamr.sh ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites
 
   spec_test_x86_32:
@@ -522,6 +528,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
+        running_mode: ["classic-interp", "fast-interp", "jit", "aot"]
         test_option: [$DEFAULT_TEST_OPTIONS, $THREADS_TEST_OPTIONS]
     steps:
       - name: checkout
@@ -553,5 +560,5 @@ jobs:
           sudo apt install -y g++-multilib lib32gcc-9-dev ninja-build
 
       - name: run spec tests
-        run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }}
+        run: ./test_wamr.sh ${{ env.X86_32_TARGET_TEST_OPTIONS }} ${{ matrix.test_option }} -t ${{ matrix.running_mode }}
         working-directory: ./tests/wamr-test-suites

+ 2 - 0
.gitignore

@@ -26,3 +26,5 @@ tests/wamr-test-suites/workspace
 !/test-tools/wamr-ide/VSCode-Extension/.vscode
 
 samples/socket-api/wasm-src/inc/pthread.h
+
+**/__pycache__

+ 10 - 0
ATTRIBUTIONS.md

@@ -13,6 +13,8 @@ WAMR project reused some components from other open source project:
 - **WebAssembly debugging patch for LLDB**: for extending the ability of LLDB to support wasm debugging
 - **libuv**: for the WASI Libc with uvwasi implementation
 - **uvwasi**: for the WASI Libc with uvwasi implementation
+- **asmjit**: for the Fast JIT x86-64 codegen implementation
+- **zydis**: for the Fast JIT x86-64 codegen implementation
 
 The WAMR fast interpreter is a clean room development. We would acknowledge the inspirations by [WASM3](https://github.com/wasm3/wasm3) open source project for the approach of pre-calculated oprand stack location.
 
@@ -29,6 +31,8 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 | WebAssembly debugging patch for LLDB | unspecified | unspecified | https://reviews.llvm.org/D78801 | |
 | libuv | v1.42.0 | v1.44.1 | https://github.com/libuv/libuv | https://www.cvedetails.com/vendor/15402/Libuv-Project.html |
 | uvwasi | unspecified | v0.0.12 | https://github.com/nodejs/uvwasi | |
+| asmjit | unspecified | unspecified | https://github.com/asmjit/asmjit | |
+| zydis | unspecified | e14a07895136182a5b53e181eec3b1c6e0b434de | https://github.com/zyantific/zydis | |
 
 ## Licenses
 
@@ -79,3 +83,9 @@ The WAMR fast interpreter is a clean room development. We would acknowledge the
 
 ### uvwasi
 [LICENSE](./core/iwasm/libraries/libc-uvwasi/LICENSE_UVWASI)
+
+### asmjit
+[LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ASMJIT)
+
+### zydis
+[LICENSE](./core/iwasm/fast-jit/cg/LICENSE_ZYDIS)

+ 1 - 1
README.md

@@ -7,7 +7,7 @@ WebAssembly Micro Runtime
 [BA]: https://bytecodealliance.org/
 
 WebAssembly Micro Runtime (WAMR) is a lightweight standalone WebAssembly (WASM) runtime with small footprint, high performance and highly configurable features for applications cross from embedded, IoT, edge to Trusted Execution Environment (TEE), smart contract, cloud native and so on. It includes a few parts as below:
-- The [**"iwasm" VM core**](./README.md#iwasm-vm-core) to run WASM applications, supporting interpreter mode, AOT mode (Ahead-of-Time compilation) and JIT mode (Just-in-Time compilation)
+- The [**"iwasm" VM core**](./README.md#iwasm-vm-core) to run WASM applications, supporting interpreter mode, AOT mode (Ahead-of-Time compilation) and JIT modes (Just-in-Time compilation, LLVM JIT and Fast JIT are supported)
 
 - The [**"wamrc" AOT compiler**](./README.md#build-wamrc-aot-compiler) to compile WASM file into AOT file for best performance and smaller runtime footprint, which is run by "iwasm" VM Core
 

+ 2 - 0
build-scripts/config_common.cmake

@@ -138,6 +138,8 @@ if (WAMR_BUILD_JIT EQUAL 1)
   else ()
     message ("     WAMR LLVM MC JIT enabled")
   endif ()
+elseif (WAMR_BUILD_FAST_JIT EQUAL 1)
+  message ("     WAMR Fast JIT enabled")
 else ()
   message ("     WAMR JIT disabled")
 endif ()

+ 10 - 1
build-scripts/runtime_lib.cmake

@@ -50,7 +50,11 @@ if (NOT DEFINED WAMR_BUILD_TARGET)
 endif ()
 
 ################ optional according to settings ################
-if (WAMR_BUILD_INTERP EQUAL 1 OR WAMR_BUILD_JIT EQUAL 1)
+if (WAMR_BUILD_INTERP EQUAL 1 OR WAMR_BUILD_JIT EQUAL 1
+    OR WAMR_BUILD_FAST_JIT EQUAL 1)
+    if (WAMR_BUILD_FAST_JIT EQUAL 1)
+        set (WAMR_BUILD_FAST_INTERP 0)
+    endif ()
     include (${IWASM_DIR}/interpreter/iwasm_interp.cmake)
 endif ()
 
@@ -61,6 +65,10 @@ if (WAMR_BUILD_AOT EQUAL 1)
     endif ()
 endif ()
 
+if (NOT WAMR_BUILD_JIT EQUAL 1 AND WAMR_BUILD_FAST_JIT EQUAL 1)
+    include (${IWASM_DIR}/fast-jit/iwasm_fast_jit.cmake)
+endif ()
+
 if (WAMR_BUILD_APP_FRAMEWORK EQUAL 1)
     include (${APP_FRAMEWORK_DIR}/app_framework.cmake)
     include (${SHARED_DIR}/coap/lib_coap.cmake)
@@ -139,6 +147,7 @@ set (source_all
     ${IWASM_INTERP_SOURCE}
     ${IWASM_AOT_SOURCE}
     ${IWASM_COMPL_SOURCE}
+    ${IWASM_FAST_JIT_SOURCE}
     ${WASM_APP_LIB_SOURCE_ALL}
     ${NATIVE_INTERFACE_SOURCE}
     ${APP_MGR_SOURCE}

+ 12 - 0
core/config.h

@@ -94,6 +94,18 @@
 #define WASM_ENABLE_LAZY_JIT 0
 #endif
 
+#ifndef WASM_ENABLE_FAST_JIT
+#define WASM_ENABLE_FAST_JIT 0
+#endif
+
+#ifndef WASM_ENABLE_FAST_JIT_DUMP
+#define WASM_ENABLE_FAST_JIT_DUMP 0
+#endif
+
+#ifndef FAST_JIT_DEFAULT_CODE_CACHE_SIZE
+#define FAST_JIT_DEFAULT_CODE_CACHE_SIZE 10 * 1024 * 1024
+#endif
+
 #ifndef WASM_ENABLE_WAMR_COMPILER
 #define WASM_ENABLE_WAMR_COMPILER 0
 #endif

+ 1 - 0
core/iwasm/common/wasm_exec_env.c

@@ -73,6 +73,7 @@ wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
 #if WASM_ENABLE_MEMORY_TRACING != 0
     wasm_runtime_dump_exec_env_mem_consumption(exec_env);
 #endif
+
     return exec_env;
 
 #if WASM_ENABLE_THREAD_MGR != 0

+ 11 - 0
core/iwasm/common/wasm_exec_env.h

@@ -84,6 +84,17 @@ typedef struct WASMExecEnv {
     void **native_symbol;
 #endif
 
+#if WASM_ENABLE_FAST_JIT != 0
+    /**
+     * Cache for
+     * - jit native operations in 32-bit target which hasn't 64-bit
+     *   int/float registers, mainly for the operations of double and int64,
+     *   such as F64TOI64, F32TOI64, I64 MUL/REM, and so on.
+     * - SSE instructions.
+     **/
+    uint64 jit_cache[2];
+#endif
+
 #if WASM_ENABLE_THREAD_MGR != 0
     /* thread return value */
     void *thread_ret_value;

+ 27 - 0
core/iwasm/common/wasm_runtime_common.c

@@ -27,6 +27,9 @@
 #if WASM_ENABLE_SHARED_MEMORY != 0
 #include "wasm_shared_memory.h"
 #endif
+#if WASM_ENABLE_FAST_JIT != 0
+#include "../fast-jit/jit_compiler.h"
+#endif
 #include "../common/wasm_c_api_internal.h"
 
 /**
@@ -117,6 +120,10 @@ runtime_malloc(uint64 size, WASMModuleInstanceCommon *module_inst,
     return mem;
 }
 
+#if WASM_ENABLE_FAST_JIT != 0
+static JitCompOptions jit_options = { 0 };
+#endif
+
 #ifdef OS_ENABLE_HW_BOUND_CHECK
 /* The exec_env of thread local storage, set before calling function
    and used in signal handler, as we cannot get it from the argument
@@ -259,8 +266,20 @@ wasm_runtime_env_init()
     }
 #endif
 
+#if WASM_ENABLE_FAST_JIT != 0
+    if (!jit_compiler_init(&jit_options)) {
+        goto fail9;
+    }
+#endif
+
     return true;
 
+#if WASM_ENABLE_FAST_JIT != 0
+fail9:
+#if WASM_ENABLE_REF_TYPES != 0
+    wasm_externref_map_destroy();
+#endif
+#endif
 #if WASM_ENABLE_REF_TYPES != 0
 fail8:
 #endif
@@ -321,6 +340,10 @@ wasm_runtime_init()
 void
 wasm_runtime_destroy()
 {
+#if WASM_ENABLE_FAST_JIT != 0
+    jit_compiler_destroy();
+#endif
+
 #if WASM_ENABLE_REF_TYPES != 0
     wasm_externref_map_destroy();
 #endif
@@ -368,6 +391,10 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
                                   &init_args->mem_alloc_option))
         return false;
 
+#if WASM_ENABLE_FAST_JIT != 0
+    jit_options.code_cache_size = init_args->fast_jit_code_cache_size;
+#endif
+
     if (!wasm_runtime_env_init()) {
         wasm_runtime_memory_destroy();
         return false;

+ 42 - 0
core/iwasm/fast-jit/asmjit_sgx_patch.diff

@@ -0,0 +1,42 @@
+diff --git a/src/asmjit/core/cpuinfo.cpp b/src/asmjit/core/cpuinfo.cpp
+index 7bf7407..ae2160b 100644
+--- a/src/asmjit/core/cpuinfo.cpp
++++ b/src/asmjit/core/cpuinfo.cpp
+@@ -9,13 +9,13 @@
+ 
+ #if !defined(_WIN32)
+   #include <errno.h>
+-  #include <sys/utsname.h>
++  //#include <sys/utsname.h>
+   #include <unistd.h>
+ #endif
+ 
+ // Required by `getauxval()` on Linux.
+ #if defined(__linux__)
+-  #include <sys/auxv.h>
++  //#include <sys/auxv.h>
+ #endif
+ 
+ //! Required to detect CPU and features on Apple platforms.
+diff --git a/src/asmjit/core/globals.cpp b/src/asmjit/core/globals.cpp
+index 2bbd0c0..e6b69e5 100644
+--- a/src/asmjit/core/globals.cpp
++++ b/src/asmjit/core/globals.cpp
+@@ -105,6 +105,8 @@ ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
+ #endif
+ }
+ 
++extern "C" int os_printf(const char *message, ...);
++
+ // DebugUtils - Debug Output
+ // =========================
+ 
+@@ -112,7 +114,7 @@ ASMJIT_FAVOR_SIZE void DebugUtils::debugOutput(const char* str) noexcept {
+ #if defined(_WIN32)
+   ::OutputDebugStringA(str);
+ #else
+-  ::fputs(str, stderr);
++  os_printf(str);
+ #endif
+ }
+ 

+ 17 - 0
core/iwasm/fast-jit/cg/LICENSE_ASMJIT

@@ -0,0 +1,17 @@
+Copyright (c) 2008-2020 The AsmJit Authors
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.

+ 23 - 0
core/iwasm/fast-jit/cg/LICENSE_ZYDIS

@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2014-2021 Florian Bernd
+Copyright (c) 2014-2021 Joel Höner
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+

+ 7027 - 0
core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

@@ -0,0 +1,7027 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_codegen.h"
+#include "jit_codecache.h"
+#include "jit_compiler.h"
+#include "jit_dump.h"
+
+#include <asmjit/core.h>
+#include <asmjit/x86.h>
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+#include <Zydis/Zydis.h>
+#endif
+
+#define CODEGEN_CHECK_ARGS 1
+#define CODEGEN_DUMP 0
+
+using namespace asmjit;
+
+static char *code_block_switch_to_jitted_from_interp = NULL;
+static char *code_block_return_to_interp_from_jitted = NULL;
+
+typedef enum {
+    REG_EBP_IDX = 0,
+    REG_EAX_IDX,
+    REG_EBX_IDX,
+    REG_ECX_IDX,
+    REG_EDX_IDX,
+    REG_EDI_IDX,
+    REG_ESI_IDX,
+    REG_I32_FREE_IDX = REG_ESI_IDX
+} RegIndexI32;
+
+typedef enum {
+    REG_RBP_IDX = 0,
+    REG_RAX_IDX,
+    REG_RBX_IDX,
+    REG_RCX_IDX,
+    REG_RDX_IDX,
+    REG_RDI_IDX,
+    REG_RSI_IDX,
+    REG_RSP_IDX,
+    REG_R8_IDX,
+    REG_R9_IDX,
+    REG_R10_IDX,
+    REG_R11_IDX,
+    REG_R12_IDX,
+    REG_R13_IDX,
+    REG_R14_IDX,
+    REG_R15_IDX,
+    REG_I64_FREE_IDX = REG_RSI_IDX
+} RegIndexI64;
+
+/* clang-format off */
+x86::Gp regs_i8[] = {
+    x86::bpl,  x86::al, x86::bl, x86::cl,
+    x86::dl,   x86::dil,  x86::sil,  x86::spl,
+    x86::r8b,  x86::r9b,  x86::r10b, x86::r11b,
+    x86::r12b, x86::r13b, x86::r14b, x86::r15b
+};
+
+x86::Gp regs_i16[] = {
+    x86::bp,   x86::ax,   x86::bx,   x86::cx,
+    x86::dx,   x86::di,   x86::si,   x86::sp,
+    x86::r8w,  x86::r9w,  x86::r10w, x86::r11w,
+    x86::r12w, x86::r13w, x86::r14w, x86::r15w
+};
+
+x86::Gp regs_i32[] = {
+    x86::ebp,  x86::eax,  x86::ebx,  x86::ecx,
+    x86::edx,  x86::edi,  x86::esi,  x86::esp,
+    x86::r8d,  x86::r9d,  x86::r10d, x86::r11d,
+    x86::r12d, x86::r13d, x86::r14d, x86::r15d
+};
+
+x86::Gp regs_i64[] = {
+    x86::rbp, x86::rax, x86::rbx, x86::rcx,
+    x86::rdx, x86::rdi, x86::rsi, x86::rsp,
+    x86::r8,  x86::r9,  x86::r10, x86::r11,
+    x86::r12, x86::r13, x86::r14, x86::r15,
+};
+
+#define REG_F32_FREE_IDX 15
+#define REG_F64_FREE_IDX 15
+
+x86::Xmm regs_float[] = {
+    x86::xmm0,
+    x86::xmm1,
+    x86::xmm2,
+    x86::xmm3,
+    x86::xmm4,
+    x86::xmm5,
+    x86::xmm6,
+    x86::xmm7,
+    x86::xmm8,
+    x86::xmm9,
+    x86::xmm10,
+    x86::xmm11,
+    x86::xmm12,
+    x86::xmm13,
+    x86::xmm14,
+    x86::xmm15,
+};
+/* clang-format on */
+
+int
+jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info,
+                               void *target)
+{
+    typedef int32 (*F)(const void *exec_env, void *info, const void *target);
+    union {
+        F f;
+        void *v;
+    } u;
+
+    u.v = code_block_switch_to_jitted_from_interp;
+    return u.f(exec_env, info, target);
+}
+
+#define PRINT_LINE() LOG_VERBOSE("<Line:%d>\n", __LINE__)
+
+#if CODEGEN_DUMP != 0
+#define GOTO_FAIL     \
+    do {              \
+        PRINT_LINE(); \
+        goto fail;    \
+    } while (0)
+#else
+#define GOTO_FAIL goto fail
+#endif
+
+#if CODEGEN_CHECK_ARGS == 0
+
+#define CHECK_EQKIND(reg0, reg1) (void)0
+#define CHECK_CONST(reg0) (void)0
+#define CHECK_NCONST(reg0) (void)0
+#define CHECK_KIND(reg0, type) (void)0
+#define CHECK_REG_NO(no, kind) (void)0
+#else
+
+/* Check if two register's kind is equal */
+#define CHECK_EQKIND(reg0, reg1)                        \
+    do {                                                \
+        if (jit_reg_kind(reg0) != jit_reg_kind(reg1)) { \
+            PRINT_LINE();                               \
+            LOG_VERBOSE("reg type not equal:\n");       \
+            jit_dump_reg(cc, reg0);                     \
+            jit_dump_reg(cc, reg1);                     \
+            GOTO_FAIL;                                  \
+        }                                               \
+    } while (0)
+
+/* Check if a register is an const */
+#define CHECK_CONST(reg0)                       \
+    do {                                        \
+        if (!jit_reg_is_const(reg0)) {          \
+            PRINT_LINE();                       \
+            LOG_VERBOSE("reg is not const:\n"); \
+            jit_dump_reg(cc, reg0);             \
+            GOTO_FAIL;                          \
+        }                                       \
+    } while (0)
+
+/* Check if a register is not an const */
+#define CHECK_NCONST(reg0)                  \
+    do {                                    \
+        if (jit_reg_is_const(reg0)) {       \
+            PRINT_LINE();                   \
+            LOG_VERBOSE("reg is const:\n"); \
+            jit_dump_reg(cc, reg0);         \
+            GOTO_FAIL;                      \
+        }                                   \
+    } while (0)
+
+/* Check if a register is a special type */
+#define CHECK_KIND(reg0, type)                                  \
+    do {                                                        \
+        if (jit_reg_kind(reg0) != type) {                       \
+            PRINT_LINE();                                       \
+            LOG_VERBOSE("invalid reg type %d, expected is: %d", \
+                        jit_reg_kind(reg0), type);              \
+            jit_dump_reg(cc, reg0);                             \
+            GOTO_FAIL;                                          \
+        }                                                       \
+    } while (0)
+
+#define CHECK_I32_REG_NO(no)                                      \
+    do {                                                          \
+        if ((uint32)no >= sizeof(regs_i32) / sizeof(regs_i32[0])) \
+            GOTO_FAIL;                                            \
+    } while (0)
+
+#define CHECK_I64_REG_NO(no)                                      \
+    do {                                                          \
+        if ((uint32)no >= sizeof(regs_i64) / sizeof(regs_i64[0])) \
+            GOTO_FAIL;                                            \
+    } while (0)
+
+#define CHECK_F32_REG_NO(no)                                          \
+    do {                                                              \
+        if ((uint32)no >= sizeof(regs_float) / sizeof(regs_float[0])) \
+            GOTO_FAIL;                                                \
+    } while (0)
+
+#define CHECK_F64_REG_NO(no)                                          \
+    do {                                                              \
+        if ((uint32)no >= sizeof(regs_float) / sizeof(regs_float[0])) \
+            GOTO_FAIL;                                                \
+    } while (0)
+
+/* Check if a register number is valid */
+#define CHECK_REG_NO(no, kind)                                           \
+    do {                                                                 \
+        if (kind == JIT_REG_KIND_I32 || kind == JIT_REG_KIND_I64) {      \
+            CHECK_I32_REG_NO(no);                                        \
+            CHECK_I64_REG_NO(no);                                        \
+        }                                                                \
+        else if (kind == JIT_REG_KIND_F32 || kind == JIT_REG_KIND_F64) { \
+            CHECK_F32_REG_NO(no);                                        \
+            CHECK_F64_REG_NO(no);                                        \
+        }                                                                \
+        else                                                             \
+            GOTO_FAIL;                                                   \
+    } while (0)
+
+#endif /* end of CODEGEN_CHECK_ARGS == 0 */
+
+/* Load one operand from insn and check none */
+#define LOAD_1ARG() r0 = *jit_insn_opnd(insn, 0)
+
+/* Load two operands from insn and check if r0 is non-const */
+#define LOAD_2ARGS()              \
+    r0 = *jit_insn_opnd(insn, 0); \
+    r1 = *jit_insn_opnd(insn, 1); \
+    CHECK_NCONST(r0)
+
+/* Load three operands from insn and check if r0 is non-const */
+#define LOAD_3ARGS()              \
+    r0 = *jit_insn_opnd(insn, 0); \
+    r1 = *jit_insn_opnd(insn, 1); \
+    r2 = *jit_insn_opnd(insn, 2); \
+    CHECK_NCONST(r0)
+
+/* Load three operands from insn and check none */
+#define LOAD_3ARGS_NO_ASSIGN()    \
+    r0 = *jit_insn_opnd(insn, 0); \
+    r1 = *jit_insn_opnd(insn, 1); \
+    r2 = *jit_insn_opnd(insn, 2);
+
+/* Load four operands from insn and check if r0 is non-const */
+#define LOAD_4ARGS()              \
+    r0 = *jit_insn_opnd(insn, 0); \
+    r1 = *jit_insn_opnd(insn, 1); \
+    r2 = *jit_insn_opnd(insn, 2); \
+    r3 = *jit_insn_opnd(insn, 3); \
+    CHECK_NCONST(r0)
+
+class JitErrorHandler : public ErrorHandler
+{
+  public:
+    Error err;
+
+    JitErrorHandler()
+      : err(kErrorOk)
+    {}
+
+    void handleError(Error e, const char *msg, BaseEmitter *base) override
+    {
+        (void)msg;
+        (void)base;
+        this->err = e;
+    }
+};
+
+/* Alu opcode */
+typedef enum { ADD, SUB, MUL, DIV_S, REM_S, DIV_U, REM_U, MIN, MAX } ALU_OP;
+/* Bit opcode */
+typedef enum { OR, XOR, AND } BIT_OP;
+/* Shift opcode */
+typedef enum { SHL, SHRS, SHRU, ROTL, ROTR } SHIFT_OP;
+/* Bitcount opcode */
+typedef enum { CLZ, CTZ, POPCNT } BITCOUNT_OP;
+/* Condition opcode */
+typedef enum { EQ, NE, GTS, GES, LTS, LES, GTU, GEU, LTU, LEU } COND_OP;
+
+typedef union _cast_float_to_integer {
+    float f;
+    uint32 i;
+} cast_float_to_integer;
+
+typedef union _cast_double_to_integer {
+    double d;
+    uint64 i;
+} cast_double_to_integer;
+
+static uint32
+local_log2(uint32 data)
+{
+    uint32 ret = 0;
+    while (data >>= 1) {
+        ret++;
+    }
+    return ret;
+}
+
+static uint64
+local_log2l(uint64 data)
+{
+    uint64 ret = 0;
+    while (data >>= 1) {
+        ret++;
+    }
+    return ret;
+}
+
+/* Jmp type */
+typedef enum JmpType {
+    JMP_DST_LABEL_REL,     /* jmp to dst label with relative addr */
+    JMP_DST_LABEL_ABS,     /* jmp to dst label with absolute addr */
+    JMP_END_OF_CALLBC,     /* jmp to end of CALLBC */
+    JMP_LOOKUPSWITCH_BASE, /* LookupSwitch table base addr */
+} JmpType;
+
+/**
+ * Jmp info, save the info on first encoding pass,
+ * and replace the offset with exact offset when the code cache
+ * has been allocated actually.
+ */
+typedef struct JmpInfo {
+    bh_list_link link;
+    JmpType type;
+    uint32 label_src;
+    uint32 offset;
+    union {
+        uint32 label_dst;
+    } dst_info;
+} JmpInfo;
+
+static bool
+label_is_neighboring(JitCompContext *cc, int32 label_prev, int32 label_succ)
+{
+    return (label_prev == 0 && label_succ == 2)
+           || (label_prev >= 2 && label_succ == label_prev + 1)
+           || (label_prev == (int32)jit_cc_label_num(cc) - 1
+               && label_succ == 1);
+}
+
+static bool
+label_is_ahead(JitCompContext *cc, int32 label_dst, int32 label_src)
+{
+    return (label_dst == 0 && label_src != 0)
+           || (label_dst != 1 && label_src == 1)
+           || (2 <= label_dst && label_dst < label_src
+               && label_src <= (int32)jit_cc_label_num(cc) - 1);
+}
+
+/**
+ * Encode jumping from one label to the other label
+ *
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_dst the index of dst label
+ * @param label_src the index of src label
+ *
+ * @return true if success, false if failed
+ */
+static bool
+jmp_from_label_to_label(x86::Assembler &a, bh_list *jmp_info_list,
+                        int32 label_dst, int32 label_src)
+{
+    Imm imm(INT32_MAX);
+    JmpInfo *node;
+
+    node = (JmpInfo *)jit_calloc(sizeof(JmpInfo));
+    if (!node)
+        return false;
+
+    node->type = JMP_DST_LABEL_REL;
+    node->label_src = label_src;
+    node->dst_info.label_dst = label_dst;
+    node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+    bh_list_insert(jmp_info_list, node);
+
+    a.jmp(imm);
+    return true;
+}
+
+/**
+ * Encode detecting compare result register according to condition code
+ * and then jumping to suitable label when the condtion is met
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_src the index of src label
+ * @param op the opcode of condition operation
+ * @param r1 the label info when condition is met
+ * @param r2 the label info when condition is unmet, do nonthing if VOID
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
+                    bh_list *jmp_info_list, int32 label_src, COND_OP op,
+                    JitReg r1, JitReg r2, bool is_last_insn)
+{
+    Imm imm(INT32_MAX);
+    JmpInfo *node;
+
+    node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+    if (!node)
+        return false;
+
+    node->type = JMP_DST_LABEL_REL;
+    node->label_src = label_src;
+    node->dst_info.label_dst = jit_reg_no(r1);
+    node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+    bh_list_insert(jmp_info_list, node);
+
+    bool fp_cmp = cc->last_cmp_on_fp;
+
+    bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
+
+    switch (op) {
+        case EQ:
+        {
+            a.je(imm);
+            break;
+        }
+        case NE:
+        {
+            a.jne(imm);
+            break;
+        }
+        case GTS:
+        {
+            if (fp_cmp)
+                a.ja(imm);
+            else
+                a.jg(imm);
+            break;
+        }
+        case LES:
+        {
+            a.jng(imm);
+            break;
+        }
+        case GES:
+        {
+            if (fp_cmp)
+                a.jae(imm);
+            else
+                a.jnl(imm);
+            break;
+        }
+        case LTS:
+        {
+            a.jl(imm);
+            break;
+        }
+        case GTU:
+        {
+            a.ja(imm);
+            break;
+        }
+        case LEU:
+        {
+            a.jna(imm);
+            break;
+        }
+        case GEU:
+        {
+            a.jnb(imm);
+            break;
+        }
+        case LTU:
+        {
+            a.jb(imm);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            break;
+        }
+    }
+
+    if (r2) {
+        int32 label_dst = jit_reg_no(r2);
+        if (!(is_last_insn && label_is_neighboring(cc, label_src, label_dst)))
+            if (!jmp_from_label_to_label(a, jmp_info_list, label_dst,
+                                         label_src))
+                return false;
+    }
+
+    return true;
+}
+
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+static void
+dump_native(char *data, uint32 length)
+{
+    /* Initialize decoder context */
+    ZydisDecoder decoder;
+    ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64,
+                     ZYDIS_STACK_WIDTH_64);
+
+    /* Initialize formatter */
+    ZydisFormatter formatter;
+    ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL);
+
+    /* Loop over the instructions in our buffer */
+    ZyanU64 runtime_address = (ZyanU64)(uintptr_t)data;
+    ZyanUSize offset = 0;
+    ZydisDecodedInstruction instruction;
+    ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
+
+    while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(
+        &decoder, data + offset, length - offset, &instruction, operands,
+        ZYDIS_MAX_OPERAND_COUNT_VISIBLE, ZYDIS_DFLAG_VISIBLE_OPERANDS_ONLY))) {
+        /* Print current instruction pointer */
+        os_printf("%012" PRIX64 "  ", runtime_address);
+
+        /* Format & print the binary instruction structure to
+           human readable format */
+        char buffer[256];
+        ZydisFormatterFormatInstruction(&formatter, &instruction, operands,
+                                        instruction.operand_count_visible,
+                                        buffer, sizeof(buffer),
+                                        runtime_address);
+        puts(buffer);
+
+        offset += instruction.length;
+        runtime_address += instruction.length;
+    }
+}
+#endif
+
+/**
+ * Encode extending register of byte to register of dword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r8_to_r32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+                 bool is_signed)
+{
+    if (is_signed) {
+        a.movsx(regs_i32[reg_no_dst], regs_i8[reg_no_src]);
+    }
+    else {
+        a.movzx(regs_i32[reg_no_dst], regs_i8[reg_no_src]);
+    }
+    return true;
+}
+/**
+ * Encode extending register of word to register of dword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r16_to_r32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+                  bool is_signed)
+{
+    if (is_signed) {
+        a.movsx(regs_i32[reg_no_dst], regs_i16[reg_no_src]);
+    }
+    else {
+        a.movzx(regs_i32[reg_no_dst], regs_i16[reg_no_src]);
+    }
+    return true;
+}
+
+/**
+ * Encode extending register of byte to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r8_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+                 bool is_signed)
+{
+    if (is_signed) {
+        a.movsx(regs_i64[reg_no_dst], regs_i8[reg_no_src]);
+    }
+    else {
+        a.movzx(regs_i64[reg_no_dst], regs_i8[reg_no_src]);
+    }
+    return true;
+}
+
+/**
+ * Encode extending register of word to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r16_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+                  bool is_signed)
+{
+    if (is_signed) {
+        a.movsx(regs_i64[reg_no_dst], regs_i16[reg_no_src]);
+    }
+    else {
+        a.movzx(regs_i64[reg_no_dst], regs_i16[reg_no_src]);
+    }
+    return true;
+}
+
+/**
+ * Encode extending register of dword to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r32_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+                  bool is_signed)
+{
+    if (is_signed) {
+        a.movsxd(regs_i64[reg_no_dst], regs_i32[reg_no_src]);
+    }
+    else {
+        /*
+         * The upper 32-bit will be zero-extended, ref to Intel document,
+         * 3.4.1.1 General-Purpose Registers: 32-bit operands generate
+         * a 32-bit result, zero-extended to a 64-bit result in the
+         * destination general-purpose register
+         */
+        a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+    }
+    return true;
+}
+
+static bool
+mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src);
+
+static bool
+mov_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src);
+
+static void
+mov_r_to_r(x86::Assembler &a, uint32 kind_dst, int32 reg_no_dst,
+           int32 reg_no_src)
+{
+    if (kind_dst == JIT_REG_KIND_I32)
+        mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+    else if (kind_dst == JIT_REG_KIND_I64)
+        mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+    else if (kind_dst == JIT_REG_KIND_F32) {
+        /* TODO */
+        bh_assert(0);
+    }
+    else if (kind_dst == JIT_REG_KIND_F64) {
+        /* TODO */
+        bh_assert(0);
+    }
+    else {
+        bh_assert(0);
+    }
+}
+
+/**
+ * Encode moving memory to a register
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64),
+ *        skipped by float and double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed whether the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param m_src the memory operand which contains the source data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_m_to_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed,
+           int32 reg_no_dst, x86::Mem &m_src)
+{
+    if (kind_dst == JIT_REG_KIND_I32) {
+        switch (bytes_dst) {
+            case 1:
+            case 2:
+                if (is_signed)
+                    a.movsx(regs_i32[reg_no_dst], m_src);
+                else
+                    a.movzx(regs_i32[reg_no_dst], m_src);
+                break;
+            case 4:
+                a.mov(regs_i32[reg_no_dst], m_src);
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+    else if (kind_dst == JIT_REG_KIND_I64) {
+        switch (bytes_dst) {
+            case 1:
+            case 2:
+                if (is_signed)
+                    a.movsx(regs_i64[reg_no_dst], m_src);
+                else
+                    a.movzx(regs_i64[reg_no_dst], m_src);
+                break;
+            case 4:
+                if (is_signed)
+                    a.movsxd(regs_i64[reg_no_dst], m_src);
+                else
+                    /*
+                     * The upper 32-bit will be zero-extended, ref to Intel
+                     * document, 3.4.1.1 General-Purpose Registers: 32-bit
+                     * operands generate a 32-bit result, zero-extended to
+                     * a 64-bit result in the destination general-purpose
+                     * register
+                     */
+                    a.mov(regs_i32[reg_no_dst], m_src);
+                break;
+            case 8:
+                a.mov(regs_i64[reg_no_dst], m_src);
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+    else if (kind_dst == JIT_REG_KIND_F32) {
+        a.movss(regs_float[reg_no_dst], m_src);
+    }
+    else if (kind_dst == JIT_REG_KIND_F64) {
+        a.movsd(regs_float[reg_no_dst], m_src);
+    }
+    return true;
+}
+
+/**
+ * Encode moving register to memory
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64),
+ *        skipped by float and double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed whether the data is signed or unsigned
+ * @param m_dst the dest memory operand
+ * @param reg_no_src the index of dest register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_m(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+           x86::Mem &m_dst, int32 reg_no_src)
+{
+    if (kind_dst == JIT_REG_KIND_I32) {
+        bh_assert(reg_no_src < 16);
+        switch (bytes_dst) {
+            case 1:
+                a.mov(m_dst, regs_i8[reg_no_src]);
+                break;
+            case 2:
+                a.mov(m_dst, regs_i16[reg_no_src]);
+                break;
+            case 4:
+                a.mov(m_dst, regs_i32[reg_no_src]);
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+    else if (kind_dst == JIT_REG_KIND_I64) {
+        bh_assert(reg_no_src < 16);
+        switch (bytes_dst) {
+            case 1:
+                a.mov(m_dst, regs_i8[reg_no_src]);
+                break;
+            case 2:
+                a.mov(m_dst, regs_i16[reg_no_src]);
+                break;
+            case 4:
+                a.mov(m_dst, regs_i32[reg_no_src]);
+                break;
+            case 8:
+                a.mov(m_dst, regs_i64[reg_no_src]);
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+    else if (kind_dst == JIT_REG_KIND_F32) {
+        a.movss(m_dst, regs_float[reg_no_src]);
+    }
+    else if (kind_dst == JIT_REG_KIND_F64) {
+        a.movsd(m_dst, regs_float[reg_no_src]);
+    }
+    return true;
+}
+
+/**
+ * Encode moving immediate data to memory
+ *
+ * @param m dst memory
+ * @param imm src immediate data
+ *
+ * @return new stream
+ */
+static bool
+mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst)
+{
+    if (bytes_dst == 8) {
+        int64 value = imm_src.value();
+        if (value >= INT32_MIN && value <= INT32_MAX) {
+            imm_src.setValue((int32)value);
+            a.mov(m_dst, imm_src);
+        }
+        else {
+            /* There is no instruction `MOV m64, imm64`, we use
+               two instructions to implement it */
+            a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
+            a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
+        }
+    }
+    else
+        a.mov(m_dst, imm_src);
+    return true;
+}
+
+/**
+ * Encode loading register data from memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param base the base address of the memory
+ * @param offset the offset address of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+                              uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+                              int32 base, int32 offset)
+{
+    x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+    return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with imm base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param base the base address of the memory
+ * @param reg_no_offset the no of register which stores the offset of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst,
+                            uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+                            int32 base, int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+    return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with register base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_base the no of register which stores the base of the memory
+ * @param offset the offset address of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+                            uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+                            int32 reg_no_base, int32 offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+    return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with register base and register
+ * offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_base the no of register which stores the base of the memory
+ * @param reg_no_offset the no of register which stores the offset of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+                          bool is_signed, int32 reg_no_dst, int32 reg_no_base,
+                          int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+    return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode storing register data to memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param base the base address of the dst memory
+ * @param offset the offset address of the dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+                            uint32 kind_dst, int32 reg_no_src, int32 base,
+                            int32 offset)
+{
+    x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+    return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with imm base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param base the base address of the dst memory
+ * @param reg_no_offset the no of register which stores the offset of the dst
+ * memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+                          int32 reg_no_src, int32 base, int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+    return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with register base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param reg_no_base the no of register which stores the base of the dst memory
+ * @param offset the offset address of the dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+                          int32 reg_no_src, int32 reg_no_base, int32 offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+    return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with register base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param reg_no_base the no of register which stores the base of the dst memory
+ * @param reg_no_offset the no of register which stores the offset of the dst
+ * memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+                        int32 reg_no_src, int32 reg_no_base,
+                        int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+    return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+static void
+imm_set_value(Imm &imm, void *data, uint32 bytes)
+{
+    switch (bytes) {
+        case 1:
+            imm.setValue(*(uint8 *)data);
+            break;
+        case 2:
+            imm.setValue(*(uint16 *)data);
+            break;
+        case 4:
+            imm.setValue(*(uint32 *)data);
+            break;
+        case 8:
+            imm.setValue(*(uint64 *)data);
+            break;
+        default:
+            bh_assert(0);
+    }
+}
+
+/**
+ * Encode storing int32 imm data to memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param base the base address of dst memory
+ * @param offset the offset address of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+                              void *data_src, int32 base, int32 offset)
+{
+    x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+    Imm imm;
+    imm_set_value(imm, data_src, bytes_dst);
+    return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with imm base and reg offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param base the base address of dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ *        of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+                            int32 base, int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+    Imm imm;
+    imm_set_value(imm, data_src, bytes_dst);
+    return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with reg base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param reg_no_base the no of register that stores the base address
+ *        of dst memory
+ * @param offset the offset address of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+                            int32 reg_no_base, int32 offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+    Imm imm;
+    imm_set_value(imm, data_src, bytes_dst);
+    return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with reg base and reg offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ *        could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param reg_no_base the no of register that stores the base address
+ *        of dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ *        of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+                          int32 reg_no_base, int32 reg_no_offset)
+{
+    x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+    Imm imm;
+    imm_set_value(imm, data_src, bytes_dst);
+    return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode moving immediate int32 data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    Imm imm(data);
+    a.mov(regs_i32[reg_no], imm);
+    return true;
+}
+
+/**
+ * Encode moving int32 data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    if (reg_no_dst != reg_no_src)
+        a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode moving immediate int64 data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    Imm imm(data);
+    a.mov(regs_i64[reg_no], imm);
+    return true;
+}
+
+/**
+ * Encode moving int64 data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    if (reg_no_dst != reg_no_src)
+        a.mov(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode moving immediate float data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_f32(x86::Assembler &a, int32 reg_no, float data)
+{
+    /* imm -> gp -> xmm */
+    cast_float_to_integer v = { .f = data };
+    Imm imm(v.i);
+    a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+    a.movd(regs_float[reg_no], regs_i32[REG_I32_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode moving float data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    if (reg_no_dst != reg_no_src) {
+        a.movss(regs_float[reg_no_dst], regs_float[reg_no_src]);
+    }
+    return true;
+}
+
+/**
+ * Encode moving immediate double data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_f64(x86::Assembler &a, int32 reg_no, double data)
+{
+    cast_double_to_integer v = { .d = data };
+    Imm imm(v.i);
+    a.mov(regs_i64[REG_I32_FREE_IDX], imm);
+    /* REG_I32_FREE_IDX == REG_I64_FREE_IDX */
+    a.movq(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode moving double data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    if (reg_no_dst != reg_no_src) {
+        a.movsd(regs_float[reg_no_dst], regs_float[reg_no_src]);
+    }
+    return true;
+}
+
+/* Let compiler do the conversation job as much as possible */
+
+/**
+ * Encoding convert int8 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int32
+ * @param data the src int8 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i8_to_r_i32(x86::Assembler &a, int32 reg_no, int8 data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * encoding convert int8 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i8_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r8_to_r32(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * encoding convert int8 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int64
+ * @param data the src int8 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i8_to_r_i64(x86::Assembler &a, int32 reg_no, int8 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * encoding convert int8 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i8_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r8_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encoding convert int16 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int32
+ * @param data the src int16 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i16_to_r_i32(x86::Assembler &a, int32 reg_no, int16 data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * encoding convert int16 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i16_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r16_to_r32(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * encoding convert int16 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int64
+ * @param data the src int16 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i16_to_r_i64(x86::Assembler &a, int32 reg_no, int16 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * encoding convert int16 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i16_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r16_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encoding convert int32 immediate data to int8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int8
+ * @param data the src int32 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i8(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    /* (int32)(int8)data will do sign-extension */
+    /* (int32)(uint32)(int8)data is longer */
+    return mov_imm_to_r_i32(a, reg_no, data & 0x000000FF);
+}
+
+/**
+ * Encoding convert int32 immediate data to int8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to int8
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+    a.and_(regs_i32[reg_no_dst], 0x000000FF);
+    return true;
+}
+
+/**
+ * Encoding convert int32 immediate data to uint8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint8
+ * @param data the src int32 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_u8(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (uint8)data);
+}
+
+/**
+ * Encoding convert int32 immediate data to uint8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint8
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_u8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return convert_r_i32_to_r_i8(a, reg_no_dst, reg_no_src);
+}
+
+/**
+ * Encoding convert int32 immediate data to int16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int16
+ * @param data the src int32 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i16(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    /* (int32)(int16)data will do sign-extension */
+    /* (int32)(uint32)(int16)data is longer */
+    return mov_imm_to_r_i32(a, reg_no, data & 0x0000FFFF);
+}
+
+/**
+ * Encoding convert int32 immediate data to int16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to int16
+ * @param reg_no_src the src register
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+    a.and_(regs_i32[reg_no_dst], 0x0000FFFF);
+    return true;
+}
+
+/**
+ * Encoding convert int32 immediate data to uint16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint16
+ * @param data the src int32 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_u16(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (uint16)data);
+}
+
+/**
+ * Encoding convert int32 immediate data to uint16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint16
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_u16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return convert_r_i32_to_r_i16(a, reg_no_dst, reg_no_src);
+}
+
+/**
+ * Encoding convert int32 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint64
+ * @param data the src int32 immediate data
+ *
+ * @return  true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i64(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encoding convert int32 register data to int64 register with signed extension
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint64
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r32_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encode converting int32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtsi2ss(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting int32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_f32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data);
+    return convert_r_i32_to_r_f32(a, reg_no, REG_I32_FREE_IDX);
+}
+
+/**
+ * Encode converting int32 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtsi2sd(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting int32 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate int32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_f64(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data);
+    return convert_r_i32_to_r_f64(a, reg_no, REG_I32_FREE_IDX);
+}
+
+/**
+ * Encode converting int64 immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i32(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting int64 register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+    a.and_(regs_i64[reg_no_dst], 0x00000000FFFFFFFFLL);
+    return true;
+}
+
+/**
+ * Encode converting int64 immediate data to int8 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i8(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int8)data);
+}
+
+/**
+ * Encode converting int64 register data to int8 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int8 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+    a.and_(regs_i64[reg_no_dst], 0x00000000000000FFLL);
+    return true;
+}
+
+/**
+ * Encode converting int64 immediate data to int16 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i16(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int16)data);
+}
+
+/**
+ * Encode converting int64 register data to int16 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int16 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+    a.and_(regs_i64[reg_no_dst], 0x000000000000FFFFLL);
+    return true;
+}
+
+/**
+ * Encode converting uint32 immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_i64(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)(uint64)data);
+}
+
+/**
+ * Encode converting uint32 register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    return extend_r32_to_r64(a, reg_no_dst, reg_no_src, false);
+}
+
+/**
+ * Encode converting uint32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_f32(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+    mov_imm_to_r_i64(a, REG_I64_FREE_IDX, (int64)(uint64)data);
+    a.cvtsi2ss(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode converting uint32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    extend_r32_to_r64(a, REG_I64_FREE_IDX, reg_no_src, false);
+    a.cvtsi2ss(regs_float[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode converting uint32 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_f64(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+    mov_imm_to_r_i64(a, REG_I64_FREE_IDX, (int64)(uint64)data);
+    a.cvtsi2sd(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode converting uint32 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    extend_r32_to_r64(a, REG_I64_FREE_IDX, reg_no_src, false);
+    a.cvtsi2sd(regs_float[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode converting int64 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtsi2ss(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting int64 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_f32(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data);
+    return convert_r_i64_to_r_f32(a, reg_no, REG_I64_FREE_IDX);
+}
+
+/**
+ * Encode converting int64 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtsi2sd(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting int64 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_f64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data);
+    return convert_r_i64_to_r_f64(a, reg_no, REG_I64_FREE_IDX);
+}
+
+/**
+ * Encode converting float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_i32(x86::Assembler &a, int32 reg_no, float data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttss2si(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_u32(x86::Assembler &a, int32 reg_no, float data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (uint32)data);
+}
+
+/**
+ * Encode converting float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_u32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttss2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting float immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_i64(x86::Assembler &a, int32 reg_no, float data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encode converting float register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int64 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttss2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting float immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_f64(x86::Assembler &a, int32 reg_no, float data)
+{
+    return mov_imm_to_r_f64(a, reg_no, (double)data);
+}
+
+/**
+ * Encode converting float register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtss2sd(regs_float[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting double immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_i32(x86::Assembler &a, int32 reg_no, double data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting double register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttsd2si(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting double immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_i64(x86::Assembler &a, int32 reg_no, double data)
+{
+    return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encode converting double register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int64 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttsd2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting double immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_f32(x86::Assembler &a, int32 reg_no, double data)
+{
+    return mov_imm_to_r_f32(a, reg_no, (float)data);
+}
+
+/**
+ * Encode converting double register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvtsd2ss(regs_float[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode converting double immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_u32(x86::Assembler &a, int32 reg_no, double data)
+{
+    return mov_imm_to_r_i32(a, reg_no, (uint32)data);
+}
+
+/**
+ * Encode converting double register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_u32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.cvttsd2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode making negative from int32 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    Imm imm(-data);
+    a.mov(regs_i32[reg_no], imm);
+    return true;
+}
+
+/**
+ * Encode making negative from int32 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+    a.neg(regs_i32[reg_no_dst]);
+    return true;
+}
+
+/**
+ * Encode making negative from int64 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the src int64 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    Imm imm(-data);
+    a.mov(regs_i64[reg_no], imm);
+    return true;
+}
+
+/**
+ * Encode making negative from int64 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+    a.neg(regs_i64[reg_no_dst]);
+    return true;
+}
+
+/**
+ * Encode making negative from float immediate data to float register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src float immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_f32(x86::Assembler &a, int32 reg_no, float data)
+{
+    bh_assert(0);
+    (void)a;
+    (void)reg_no;
+    (void)data;
+    return false;
+}
+
+/**
+ * Encode making negative from float register to float register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    bh_assert(0);
+    (void)a;
+    (void)reg_no_dst;
+    (void)reg_no_src;
+    return false;
+}
+
+/**
+ * Encode making negative from double immediate data to double register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src double immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_f64(x86::Assembler &a, int32 reg_no, double data)
+{
+    bh_assert(0);
+    (void)a;
+    (void)reg_no;
+    (void)data;
+    return false;
+}
+
+/**
+ * Encode making negative from double register to double register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    bh_assert(0);
+    (void)a;
+    (void)reg_no_dst;
+    (void)reg_no_src;
+    return false;
+}
+
+static COND_OP
+not_cond(COND_OP op)
+{
+    COND_OP not_list[] = { NE, EQ, LES, LTS, GES, GTS, LEU, LTU, GEU, GTU };
+
+    bh_assert(op <= LEU);
+    return not_list[op];
+}
+
+/**
+ * Encode int32 alu operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_imm_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                int32 reg_no_src, int32 data)
+{
+    Imm imm(data);
+
+    switch (op) {
+        case ADD:
+            mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+            if (data == 1)
+                a.inc(regs_i32[reg_no_dst]);
+            else if (data == -1)
+                a.dec(regs_i32[reg_no_dst]);
+            else if (data != 0)
+                a.add(regs_i32[reg_no_dst], imm);
+            break;
+        case SUB:
+            mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+            if (data == -1)
+                a.inc(regs_i32[reg_no_dst]);
+            else if (data == 1)
+                a.dec(regs_i32[reg_no_dst]);
+            else if (data != 0)
+                a.sub(regs_i32[reg_no_dst], imm);
+            break;
+        case MUL:
+            if (data == 0)
+                a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_dst]);
+            else if (data == -1) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+                a.neg(regs_i32[reg_no_dst]);
+            }
+            else if (data == 1) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+            }
+            else if (data > 0 && (data & (data - 1)) == 0x0) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+                data = (int32)local_log2(data);
+                imm.setValue(data);
+                a.shl(regs_i32[reg_no_dst], imm);
+            }
+            else {
+                a.imul(regs_i32[reg_no_dst], regs_i32[reg_no_src], imm);
+            }
+            break;
+        case DIV_S:
+        case REM_S:
+            bh_assert(reg_no_src == REG_EAX_IDX);
+            if (op == DIV_S) {
+                bh_assert(reg_no_dst == REG_EAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_EDX_IDX);
+            }
+            a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+            /* signed extend eax to edx:eax */
+            a.cdq();
+            a.idiv(regs_i32[REG_I32_FREE_IDX]);
+            break;
+        case DIV_U:
+        case REM_U:
+            bh_assert(reg_no_src == REG_EAX_IDX);
+            if (op == DIV_U) {
+                bh_assert(reg_no_dst == REG_EAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_EDX_IDX);
+            }
+            a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+            /* unsigned extend eax to edx:eax */
+            a.xor_(regs_i32[REG_EDX_IDX], regs_i32[REG_EDX_IDX]);
+            a.div(regs_i32[REG_I32_FREE_IDX]);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    return true;
+}
+
+/**
+ * Encode int32 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src,
+              int32 reg_no2_src)
+{
+    switch (op) {
+        case ADD:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+                a.add(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+            }
+            else
+                a.add(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+            break;
+        case SUB:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+                a.sub(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+            }
+            else {
+                a.sub(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+                a.neg(regs_i32[reg_no2_src]);
+            }
+            break;
+        case MUL:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+                a.imul(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+            }
+            else
+                a.imul(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+            break;
+        case DIV_S:
+        case REM_S:
+            bh_assert(reg_no1_src == REG_EAX_IDX);
+            if (op == DIV_S) {
+                bh_assert(reg_no_dst == REG_EAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_EDX_IDX);
+                if (reg_no2_src == REG_EDX_IDX) {
+                    /* convert `REM_S edx, eax, edx` into
+                       `mov esi, edx` and `REM_S edx eax, rsi` to
+                       avoid overwritting edx when a.cdq() */
+                    a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[REG_EDX_IDX]);
+                    reg_no2_src = REG_I32_FREE_IDX;
+                }
+            }
+            /* signed extend eax to edx:eax */
+            a.cdq();
+            a.idiv(regs_i32[reg_no2_src]);
+            break;
+        case DIV_U:
+        case REM_U:
+            bh_assert(reg_no1_src == REG_EAX_IDX);
+            if (op == DIV_U) {
+                bh_assert(reg_no_dst == REG_EAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_EDX_IDX);
+                if (reg_no2_src == REG_EDX_IDX) {
+                    /* convert `REM_U edx, eax, edx` into
+                       `mov esi, edx` and `REM_U edx eax, rsi` to
+                       avoid overwritting edx when unsigned extend
+                       eax to edx:eax */
+                    a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[REG_EDX_IDX]);
+                    reg_no2_src = REG_I32_FREE_IDX;
+                }
+            }
+            /* unsigned extend eax to edx:eax */
+            a.xor_(regs_i32[REG_EDX_IDX], regs_i32[REG_EDX_IDX]);
+            a.div(regs_i32[reg_no2_src]);
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+    return true;
+}
+
+/**
+ * Encode int32 alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                     int32 data1_src, int32 data2_src)
+{
+    Imm imm;
+    int32 data = 0;
+
+    switch (op) {
+        case ADD:
+            data = data1_src + data2_src;
+            break;
+        case SUB:
+            data = data1_src - data2_src;
+            break;
+        case MUL:
+            data = data1_src * data2_src;
+            break;
+        case DIV_S:
+            data = data1_src / data2_src;
+            break;
+        case REM_S:
+            data = data1_src % data2_src;
+            break;
+        case DIV_U:
+            data = (uint32)data1_src / (uint32)data2_src;
+            break;
+        case REM_U:
+            data = (uint32)data1_src % (uint32)data2_src;
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+    imm.setValue(data);
+    a.mov(regs_i32[reg_no_dst], imm);
+    return true;
+}
+
+/**
+ * Encode int32 alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int32 data1_src, int32 reg_no2_src)
+{
+    if (op == ADD || op == MUL)
+        return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src);
+    else if (op == SUB) {
+        if (!alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src))
+            return false;
+        a.neg(regs_i32[reg_no_dst]);
+        return true;
+    }
+    else {
+        if (reg_no_dst != reg_no2_src) {
+            if (!mov_imm_to_r_i32(a, reg_no_dst, data1_src)
+                || !alu_r_r_r_i32(a, op, reg_no_dst, reg_no_dst, reg_no2_src))
+                return false;
+            return true;
+        }
+        else {
+            if (!mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data1_src)
+                || !alu_r_r_r_i32(a, op, reg_no_dst, REG_I32_FREE_IDX,
+                                  reg_no2_src))
+                return false;
+            return true;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * Encode int32 alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int32 data2_src)
+{
+    return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no1_src, data2_src);
+}
+
+/**
+ * Encode int32 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    return alu_r_r_r_i32(a, op, reg_no_dst, reg_no1_src, reg_no2_src);
+}
+
+/**
+ * Encode int64 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src,
+              int32 reg_no2_src)
+{
+    switch (op) {
+        case ADD:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+                a.add(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+            }
+            else
+                a.add(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+            break;
+        case SUB:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+                a.sub(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+            }
+            else {
+                a.sub(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+                a.neg(regs_i64[reg_no2_src]);
+            }
+            break;
+        case MUL:
+            if (reg_no_dst != reg_no2_src) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+                a.imul(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+            }
+            else
+                a.imul(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+            break;
+        case DIV_S:
+        case REM_S:
+            bh_assert(reg_no1_src == REG_RAX_IDX);
+            if (op == DIV_S) {
+                bh_assert(reg_no_dst == REG_RAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_RDX_IDX);
+            }
+            /* signed extend rax to rdx:rax */
+            a.cqo();
+            a.idiv(regs_i64[reg_no2_src]);
+            break;
+        case DIV_U:
+        case REM_U:
+            bh_assert(reg_no1_src == REG_RAX_IDX);
+            if (op == DIV_U) {
+                bh_assert(reg_no_dst == REG_RAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_RDX_IDX);
+            }
+            /* unsigned extend rax to rdx:rax */
+            a.xor_(regs_i64[REG_RDX_IDX], regs_i64[REG_RDX_IDX]);
+            a.div(regs_i64[reg_no2_src]);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    return true;
+}
+
+/**
+ * Encode int64 alu operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_imm_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                int32 reg_no_src, int64 data)
+{
+    Imm imm(data);
+
+    switch (op) {
+        case ADD:
+            mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+            if (data == 1)
+                a.inc(regs_i64[reg_no_dst]);
+            else if (data == -1)
+                a.dec(regs_i64[reg_no_dst]);
+            else if (data != 0) {
+                if (data >= INT32_MIN && data <= INT32_MAX) {
+                    imm.setValue((int32)data);
+                    a.add(regs_i64[reg_no_dst], imm);
+                }
+                else {
+                    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+                    a.add(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+                }
+            }
+            break;
+        case SUB:
+            mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+            if (data == -1)
+                a.inc(regs_i64[reg_no_dst]);
+            else if (data == 1)
+                a.dec(regs_i64[reg_no_dst]);
+            else if (data != 0) {
+                if (data >= INT32_MIN && data <= INT32_MAX) {
+                    imm.setValue((int32)data);
+                    a.sub(regs_i64[reg_no_dst], imm);
+                }
+                else {
+                    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+                    a.sub(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+                }
+            }
+            break;
+        case MUL:
+            if (data == 0)
+                a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]);
+            else if (data == -1) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+                a.neg(regs_i64[reg_no_dst]);
+            }
+            else if (data == 1) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+            }
+            else if (data > 0 && (data & (data - 1)) == 0x0) {
+                mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+                data = (int64)local_log2l(data);
+                imm.setValue(data);
+                a.shl(regs_i64[reg_no_dst], imm);
+            }
+            else if (INT32_MIN <= data && data <= INT32_MAX) {
+                a.imul(regs_i64[reg_no_dst], regs_i64[reg_no_src], imm);
+            }
+            else {
+                mov_imm_to_r_i64(
+                    a, reg_no_dst == reg_no_src ? REG_I64_FREE_IDX : reg_no_dst,
+                    data);
+                alu_r_r_r_i64(a, op, reg_no_dst,
+                              reg_no_dst == reg_no_src ? REG_I64_FREE_IDX
+                                                       : reg_no_dst,
+                              reg_no_src);
+            }
+            break;
+        case DIV_S:
+        case REM_S:
+            bh_assert(reg_no_src == REG_RAX_IDX);
+            if (op == DIV_S) {
+                bh_assert(reg_no_dst == REG_RAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_RDX_IDX);
+            }
+            a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+            /* signed extend rax to rdx:rax */
+            a.cqo();
+            a.idiv(regs_i64[REG_I64_FREE_IDX]);
+            break;
+        case DIV_U:
+        case REM_U:
+            bh_assert(reg_no_src == REG_RAX_IDX);
+            if (op == DIV_U) {
+                bh_assert(reg_no_dst == REG_RAX_IDX);
+            }
+            else {
+                bh_assert(reg_no_dst == REG_RDX_IDX);
+            }
+            a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+            /* unsigned extend rax to rdx:rax */
+            a.xor_(regs_i64[REG_RDX_IDX], regs_i64[REG_RDX_IDX]);
+            a.div(regs_i64[REG_I64_FREE_IDX]);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    return true;
+}
+
+/**
+ * Encode int64 alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                     int64 data1_src, int64 data2_src)
+{
+    Imm imm;
+    int64 data = 0;
+
+    switch (op) {
+        case ADD:
+            data = data1_src + data2_src;
+            break;
+        case SUB:
+            data = data1_src - data2_src;
+            break;
+        case MUL:
+            data = data1_src * data2_src;
+            break;
+        case DIV_S:
+            data = data1_src / data2_src;
+            break;
+        case REM_S:
+            data = data1_src % data2_src;
+            break;
+        case DIV_U:
+            data = (uint64)data1_src / (uint64)data2_src;
+            break;
+        case REM_U:
+            data = (uint64)data1_src % (uint64)data2_src;
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    imm.setValue(data);
+    a.mov(regs_i64[reg_no_dst], imm);
+    return true;
+}
+
+/**
+ * Encode int64 alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int64 data1_src, int32 reg_no2_src)
+{
+    if (op == ADD || op == MUL)
+        return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src);
+    else if (op == SUB) {
+        if (!alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src))
+            return false;
+        a.neg(regs_i64[reg_no_dst]);
+        return true;
+    }
+    else {
+        if (reg_no_dst != reg_no2_src) {
+            if (!mov_imm_to_r_i64(a, reg_no_dst, data1_src)
+                || !alu_r_r_r_i64(a, op, reg_no_dst, reg_no_dst, reg_no2_src))
+                return false;
+            return true;
+        }
+        else {
+            if (!mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data1_src)
+                || !alu_r_r_r_i64(a, op, reg_no_dst, REG_I64_FREE_IDX,
+                                  reg_no2_src))
+                return false;
+            return true;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * Encode int64 alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int64 data2_src)
+{
+    return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no1_src, data2_src);
+}
+
+/**
+ * Encode int64 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    return alu_r_r_r_i64(a, op, reg_no_dst, reg_no1_src, reg_no2_src);
+}
+
+/**
+ * Encode float alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                     float data1_src, float data2_src)
+{
+    Imm imm;
+    float data = 0;
+
+    switch (op) {
+        case ADD:
+        {
+            data = data1_src + data2_src;
+            break;
+        }
+        case SUB:
+        {
+            data = data1_src - data2_src;
+            break;
+        }
+        case MUL:
+        {
+            data = data1_src * data2_src;
+            break;
+        }
+        case DIV_S:
+        {
+            data = data1_src / data2_src;
+            break;
+        }
+        case MAX:
+        {
+            data = fmaxf(data1_src, data2_src);
+            break;
+        }
+        case MIN:
+        {
+            data = fminf(data1_src, data2_src);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            return false;
+        }
+    }
+
+    return mov_imm_to_r_f32(a, reg_no_dst, data);
+}
+
+static bool
+alu_r_m_float(x86::Assembler &a, ALU_OP op, int32 reg_no, x86::Mem &m,
+              bool is_f32)
+{
+    switch (op) {
+        case ADD:
+        {
+            if (is_f32)
+                a.addss(regs_float[reg_no], m);
+            else
+                a.addsd(regs_float[reg_no], m);
+            break;
+        }
+        case SUB:
+        {
+            if (is_f32)
+                a.subss(regs_float[reg_no], m);
+            else
+                a.subsd(regs_float[reg_no], m);
+            break;
+        }
+        case MUL:
+        {
+            if (is_f32)
+                a.mulss(regs_float[reg_no], m);
+            else
+                a.mulsd(regs_float[reg_no], m);
+            break;
+        }
+        case DIV_S:
+        {
+            if (is_f32)
+                a.divss(regs_float[reg_no], m);
+            else
+                a.divsd(regs_float[reg_no], m);
+            break;
+        }
+        case MAX:
+        {
+            if (is_f32)
+                a.maxss(regs_float[reg_no], m);
+            else
+                a.maxsd(regs_float[reg_no], m);
+            break;
+        }
+        case MIN:
+        {
+            if (is_f32)
+                a.minss(regs_float[reg_no], m);
+            else
+                a.minsd(regs_float[reg_no], m);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            return false;
+        }
+    }
+    return true;
+}
+
+/**
+ * Encode float alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   float data1_src, int32 reg_no2_src)
+{
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    /* xmm -> m128 */
+    x86::Mem cache = x86::xmmword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+                                      offsetof(WASMExecEnv, jit_cache));
+    a.movups(cache, regs_float[reg_no2_src]);
+
+    /* imm -> gp -> xmm */
+    mov_imm_to_r_f32(a, reg_no_dst, data1_src);
+
+    return alu_r_m_float(a, op, reg_no_dst, cache, true);
+}
+
+/**
+ * Encode float alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, float data2_src)
+{
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    /* imm -> m32 */
+    x86::Mem cache = x86::dword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+                                    offsetof(WASMExecEnv, jit_cache));
+    cast_float_to_integer v = { .f = data2_src };
+    Imm imm(v.i);
+    mov_imm_to_m(a, cache, imm, 4);
+
+    mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
+    return alu_r_m_float(a, op, reg_no_dst, cache, true);
+}
+
+/**
+ * Encode float alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    bool store_result = false;
+
+    /**
+     * - op r0,r0,r1. do nothing since instructions always store results in
+     *   the first register
+     *
+     * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
+     *   results in r1
+     *
+     * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
+     *   naturally
+     **/
+    if (reg_no_dst == reg_no2_src) {
+        store_result = true;
+        reg_no_dst = REG_F32_FREE_IDX;
+    }
+    mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
+
+    switch (op) {
+        case ADD:
+        {
+            a.addss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case SUB:
+        {
+            a.subss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MUL:
+        {
+            a.mulss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case DIV_S:
+        {
+            a.divss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MAX:
+        {
+            a.maxss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MIN:
+        {
+            a.minss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            return false;
+        }
+    }
+
+    if (store_result)
+        mov_r_to_r_f32(a, reg_no2_src, REG_F32_FREE_IDX);
+
+    return true;
+}
+
+/**
+ * Encode double alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                     double data1_src, double data2_src)
+{
+    Imm imm;
+    double data = 0;
+
+    switch (op) {
+        case ADD:
+        {
+            data = data1_src + data2_src;
+            break;
+        }
+        case SUB:
+        {
+            data = data1_src - data2_src;
+            break;
+        }
+        case MUL:
+        {
+            data = data1_src * data2_src;
+            break;
+        }
+        case DIV_S:
+        {
+            data = data1_src / data2_src;
+            break;
+        }
+        case MAX:
+        {
+            data = fmax(data1_src, data2_src);
+            break;
+        }
+        case MIN:
+        {
+            data = fmin(data1_src, data2_src);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            return false;
+        }
+    }
+
+    return mov_imm_to_r_f64(a, reg_no_dst, data);
+}
+
+/**
+ * Encode double alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   double data1_src, int32 reg_no2_src)
+{
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    /* xmm -> m128 */
+    x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+                                    offsetof(WASMExecEnv, jit_cache));
+    a.movupd(cache, regs_float[reg_no2_src]);
+
+    /* imm -> gp -> xmm */
+    mov_imm_to_r_f64(a, reg_no_dst, data1_src);
+
+    return alu_r_m_float(a, op, reg_no_dst, cache, false);
+}
+
+/**
+ * Encode double alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, double data2_src)
+{
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    /* imm -> m64 */
+    x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+                                    offsetof(WASMExecEnv, jit_cache));
+    cast_double_to_integer v = { .d = data2_src };
+    Imm imm(v.i);
+    mov_imm_to_m(a, cache, imm, 8);
+
+    mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
+    return alu_r_m_float(a, op, reg_no_dst, cache, false);
+}
+
+/**
+ * Encode double alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    bool store_result = false;
+
+    /**
+     * - op r0,r0,r1. do nothing since instructions always store results in
+     *   the first register
+     *
+     * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
+     *   results in r1
+     *
+     * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
+     *   naturally
+     **/
+    if (reg_no_dst == reg_no2_src) {
+        store_result = true;
+        reg_no_dst = REG_F64_FREE_IDX;
+    }
+    mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
+
+    switch (op) {
+        case ADD:
+        {
+            a.addsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case SUB:
+        {
+            a.subsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MUL:
+        {
+            a.mulsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case DIV_S:
+        {
+            a.divsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MAX:
+        {
+            a.maxsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        case MIN:
+        {
+            a.minsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            return false;
+        }
+    }
+
+    if (store_result)
+        mov_r_to_r_f64(a, reg_no2_src, REG_F64_FREE_IDX);
+
+    return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_i32(x86::Assembler &a, BIT_OP op, int32 reg_no, int32 data)
+{
+    Imm imm(data);
+
+    switch (op) {
+        case OR:
+            if (data != 0)
+                a.or_(regs_i32[reg_no], imm);
+            break;
+        case XOR:
+            if (data == -1)
+                a.not_(regs_i32[reg_no]);
+            else if (data != 0)
+                a.xor_(regs_i32[reg_no], imm);
+            break;
+        case AND:
+            if (data != -1)
+                a.and_(regs_i32[reg_no], imm);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+    return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst, int32 reg_no_src)
+{
+    switch (op) {
+        case OR:
+            a.or_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        case XOR:
+            a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        case AND:
+            a.and_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+    return true;
+}
+
+/**
+ * Encode int32 bit operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_imm_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                     int32 data1_src, int32 data2_src)
+{
+    Imm imm;
+
+    switch (op) {
+        case OR:
+            imm.setValue(data1_src | data2_src);
+            break;
+        case XOR:
+            imm.setValue(data1_src ^ data2_src);
+            break;
+        case AND:
+            imm.setValue(data1_src & data2_src);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    a.mov(regs_i32[reg_no_dst], imm);
+    return true;
+}
+
+/**
+ * Encode int32 bit operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_r_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                   int32 data1_src, int32 reg_no2_src)
+{
+    if (op == AND && data1_src == 0)
+        a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_dst]);
+    else if (op == OR && data1_src == -1) {
+        Imm imm(-1);
+        a.mov(regs_i32[reg_no_dst], imm);
+    }
+    else {
+        mov_r_to_r_i32(a, reg_no_dst, reg_no2_src);
+        return bit_r_imm_i32(a, op, reg_no_dst, data1_src);
+    }
+    return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int32 data2_src)
+{
+    return bit_imm_r_to_r_i32(a, op, reg_no_dst, data2_src, reg_no1_src);
+}
+
+/**
+ * Encode int32 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    if (reg_no_dst != reg_no2_src) {
+        mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+        return bit_r_r_i32(a, op, reg_no_dst, reg_no2_src);
+    }
+    else
+        return bit_r_r_i32(a, op, reg_no_dst, reg_no1_src);
+    return false;
+}
+
+/**
+ * Encode int64 bit operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_i64(x86::Assembler &a, BIT_OP op, int32 reg_no, int64 data)
+{
+    Imm imm(data);
+
+    switch (op) {
+        case OR:
+            if (data != 0) {
+                if (data >= INT32_MIN && data <= INT32_MAX) {
+                    imm.setValue((int32)data);
+                    a.or_(regs_i64[reg_no], imm);
+                }
+                else {
+                    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+                    a.or_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+                }
+            }
+            break;
+        case XOR:
+            if (data == -1LL)
+                a.not_(regs_i64[reg_no]);
+            else if (data != 0) {
+                if (data >= INT32_MIN && data <= INT32_MAX) {
+                    imm.setValue((int32)data);
+                    a.xor_(regs_i64[reg_no], imm);
+                }
+                else {
+                    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+                    a.xor_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+                }
+            }
+            break;
+        case AND:
+            if (data != -1LL) {
+                if (data >= INT32_MIN && data <= INT32_MAX) {
+                    imm.setValue((int32)data);
+                    a.and_(regs_i64[reg_no], imm);
+                }
+                else {
+                    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+                    a.and_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+                }
+            }
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+    return true;
+}
+
+/**
+ * Encode int64 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst, int32 reg_no_src)
+{
+    switch (op) {
+        case OR:
+            a.or_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        case XOR:
+            a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        case AND:
+            a.and_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+    return true;
+}
+
+/**
+ * Encode int64 bit operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_imm_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                     int32 data1_src, int64 data2_src)
+{
+    Imm imm;
+
+    switch (op) {
+        case OR:
+            imm.setValue(data1_src | data2_src);
+            break;
+        case XOR:
+            imm.setValue(data1_src ^ data2_src);
+            break;
+        case AND:
+            imm.setValue(data1_src & data2_src);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    a.mov(regs_i64[reg_no_dst], imm);
+    return true;
+}
+
+/**
+ * Encode int64 bit operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_r_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                   int64 data1_src, int32 reg_no2_src)
+{
+    if (op == AND && data1_src == 0)
+        a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]);
+    else if (op == OR && data1_src == -1LL) {
+        Imm imm(-1LL);
+        a.mov(regs_i64[reg_no_dst], imm);
+    }
+    else {
+        mov_r_to_r_i64(a, reg_no_dst, reg_no2_src);
+        return bit_r_imm_i64(a, op, reg_no_dst, data1_src);
+    }
+    return true;
+}
+
+/**
+ * Encode int64 bit operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int64 data2_src)
+{
+    return bit_imm_r_to_r_i64(a, op, reg_no_dst, data2_src, reg_no1_src);
+}
+
+/**
+ * Encode int64 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+                 int32 reg_no1_src, int32 reg_no2_src)
+{
+    if (reg_no_dst != reg_no2_src) {
+        mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+        return bit_r_r_i64(a, op, reg_no_dst, reg_no2_src);
+    }
+    else
+        return bit_r_r_i64(a, op, reg_no_dst, reg_no1_src);
+    return false;
+}
+
+/**
+ * Encode int32 shift operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                       int32 data1_src, int32 data2_src)
+{
+    int32 data;
+    switch (op) {
+        case SHL:
+        {
+            data = data1_src << data2_src;
+            break;
+        }
+        case SHRS:
+        {
+            data = data1_src >> data2_src;
+            break;
+        }
+        case SHRU:
+        {
+            data = ((uint32)data1_src) >> data2_src;
+            break;
+        }
+        case ROTL:
+        {
+            data = (data1_src << data2_src)
+                   | (((uint32)data1_src) >> (32 - data2_src));
+            break;
+        }
+        case ROTR:
+        {
+            data = (((uint32)data1_src) >> data2_src)
+                   | (data1_src << (32 - data2_src));
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return mov_imm_to_r_i32(a, reg_no_dst, data);
+fail:
+    return false;
+}
+
+/**
+ * Encode int32 shift operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_r_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                     int32 data1_src, int32 reg_no2_src)
+{
+    /* Should have been optimized by previous lower */
+    bh_assert(0);
+    (void)a;
+    (void)op;
+    (void)reg_no_dst;
+    (void)data1_src;
+    (void)reg_no2_src;
+    return false;
+}
+
+/**
+ * Encode int32 shift operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                     int32 reg_no1_src, int32 data2_src)
+{
+    /* SHL/SHA/SHR r/m32, imm8 */
+    Imm imm((uint8)data2_src);
+
+    mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+    switch (op) {
+        case SHL:
+        {
+            a.shl(regs_i32[reg_no_dst], imm);
+            break;
+        }
+        case SHRS:
+        {
+            a.sar(regs_i32[reg_no_dst], imm);
+            break;
+        }
+        case SHRU:
+        {
+            a.shr(regs_i32[reg_no_dst], imm);
+            break;
+        }
+        case ROTL:
+        {
+            a.rol(regs_i32[reg_no_dst], imm);
+            break;
+        }
+        case ROTR:
+        {
+            a.ror(regs_i32[reg_no_dst], imm);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode int32 shift operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_r_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int32 reg_no2_src)
+{
+    /* should be CL */
+    if (reg_no2_src != REG_ECX_IDX)
+        return false;
+
+    mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+
+    switch (op) {
+        case SHL:
+        {
+            a.shl(regs_i32[reg_no_dst], x86::cl);
+            break;
+        }
+        case SHRS:
+        {
+            a.sar(regs_i32[reg_no_dst], x86::cl);
+            break;
+        }
+        case SHRU:
+        {
+            a.shr(regs_i32[reg_no_dst], x86::cl);
+            break;
+        }
+        case ROTL:
+        {
+            a.rol(regs_i32[reg_no_dst], x86::cl);
+            break;
+        }
+        case ROTR:
+        {
+            a.ror(regs_i32[reg_no_dst], x86::cl);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode int64 shift operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                       int64 data1_src, int64 data2_src)
+{
+    int64 data;
+
+    switch (op) {
+        case SHL:
+        {
+            data = data1_src << data2_src;
+            break;
+        }
+        case SHRS:
+        {
+            data = data1_src >> data2_src;
+            break;
+        }
+        case SHRU:
+        {
+            data = ((uint64)data1_src) >> data2_src;
+            break;
+        }
+        case ROTL:
+        {
+            data = (data1_src << data2_src)
+                   | (((uint64)data1_src) >> (64LL - data2_src));
+            break;
+        }
+        case ROTR:
+        {
+            data = (((uint64)data1_src) >> data2_src)
+                   | (data1_src << (64LL - data2_src));
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return mov_imm_to_r_i64(a, reg_no_dst, data);
+fail:
+    return false;
+}
+
+/**
+ * Encode int64 shift operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                     int64 data1_src, int32 reg_no2_src)
+{
+    /* Should have been optimized by previous lower */
+    bh_assert(0);
+    (void)a;
+    (void)op;
+    (void)reg_no_dst;
+    (void)data1_src;
+    (void)reg_no2_src;
+    return false;
+}
+
+/**
+ * Encode int64 shift operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                     int32 reg_no1_src, int64 data2_src)
+{
+    /* SHL/SHA/SHR r/m64, imm8 */
+    Imm imm((uint8)data2_src);
+
+    mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+    switch (op) {
+        case SHL:
+        {
+            a.shl(regs_i64[reg_no_dst], imm);
+            break;
+        }
+        case SHRS:
+        {
+            a.sar(regs_i64[reg_no_dst], imm);
+            break;
+        }
+        case SHRU:
+        {
+            a.shr(regs_i64[reg_no_dst], imm);
+            break;
+        }
+        case ROTL:
+        {
+            a.rol(regs_i64[reg_no_dst], imm);
+            break;
+        }
+        case ROTR:
+        {
+            a.ror(regs_i64[reg_no_dst], imm);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode int64 shift operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+                   int32 reg_no1_src, int32 reg_no2_src)
+{
+    /* should be CL */
+    if (reg_no2_src != REG_ECX_IDX)
+        return false;
+
+    mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+
+    switch (op) {
+        case SHL:
+        {
+            a.shl(regs_i64[reg_no_dst], x86::cl);
+            break;
+        }
+        case SHRS:
+        {
+            a.sar(regs_i64[reg_no_dst], x86::cl);
+            break;
+        }
+        case SHRU:
+        {
+            a.shr(regs_i64[reg_no_dst], x86::cl);
+            break;
+        }
+        case ROTL:
+        {
+            a.rol(regs_i64[reg_no_dst], x86::cl);
+            break;
+        }
+        case ROTR:
+        {
+            a.ror(regs_i64[reg_no_dst], x86::cl);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode int32 cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+                     int32 data2_src)
+{
+    Imm imm(data1_src);
+    a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+    imm.setValue(data2_src);
+    a.cmp(regs_i32[REG_I32_FREE_IDX], imm);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int32 cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+                   int32 reg_no2_src)
+{
+    Imm imm(data1_src);
+    a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+    a.cmp(regs_i32[REG_I32_FREE_IDX], regs_i32[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int32 cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                   int32 data2_src)
+{
+    Imm imm(data2_src);
+    a.cmp(regs_i32[reg_no1_src], imm);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int32 cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                 int32 reg_no2_src)
+{
+    a.cmp(regs_i32[reg_no1_src], regs_i32[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int64 cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+                     int32 data2_src)
+{
+    Imm imm(data1_src);
+    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+    imm.setValue(data2_src);
+    a.cmp(regs_i64[REG_I64_FREE_IDX], imm);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int64 cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int64 data1_src,
+                   int32 reg_no2_src)
+{
+    Imm imm(data1_src);
+    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+    a.cmp(regs_i64[REG_I64_FREE_IDX], regs_i64[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int64 cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                   int64 data2_src)
+{
+    Imm imm(data2_src);
+
+    if (data2_src >= INT32_MIN && data2_src <= INT32_MAX) {
+        imm.setValue((int32)data2_src);
+        a.cmp(regs_i64[reg_no1_src], imm);
+    }
+    else {
+        a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+        a.cmp(regs_i64[reg_no1_src], regs_i64[REG_I64_FREE_IDX]);
+    }
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode int64 cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                 int32 reg_no2_src)
+{
+    a.cmp(regs_i64[reg_no1_src], regs_i64[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode float cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                 int32 reg_no2_src)
+{
+    a.comiss(regs_float[reg_no1_src], regs_float[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode float cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
+                     float data2_src)
+{
+    /* should have been optimized in the frontend */
+    bh_assert(0);
+    (void)a;
+    (void)reg_no_dst;
+    (void)data1_src;
+    (void)data2_src;
+    return false;
+}
+
+/**
+ * Encode float cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
+                   int32 reg_no2_src)
+{
+    mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data1_src);
+    a.comiss(regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode float cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                   float data2_src)
+{
+    mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data2_src);
+    a.comiss(regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode double cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                 int32 reg_no2_src)
+{
+    a.comisd(regs_float[reg_no1_src], regs_float[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode double cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
+                     double data2_src)
+{
+    /* should have been optimized in the frontend */
+    bh_assert(0);
+    (void)a;
+    (void)reg_no_dst;
+    (void)data1_src;
+    (void)data2_src;
+    return false;
+}
+
+/**
+ * Encode double cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
+                   int32 reg_no2_src)
+{
+    mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data1_src);
+    a.comisd(regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode double cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+                   double data2_src)
+{
+    mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data2_src);
+    a.comisd(regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]);
+    (void)reg_no_dst;
+    return true;
+}
+
+/**
+ * Encode insn ld: LD_type r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param bytes_dst the byte number of dst data
+ * @param is_signed the data is signed or unsigned
+ */
+#define LD_R_R_R(kind, bytes_dst, is_signed)                                  \
+    do {                                                                      \
+        int32 reg_no_dst = 0, reg_no_base = 0, reg_no_offset = 0;             \
+        int32 base = 0, offset = 0;                                           \
+        bool _ret = false;                                                    \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            CHECK_KIND(r1, JIT_REG_KIND_I32);                                 \
+        }                                                                     \
+        else {                                                                \
+            CHECK_KIND(r1, JIT_REG_KIND_I64);                                 \
+        }                                                                     \
+        if (jit_reg_is_const(r2)) {                                           \
+            CHECK_KIND(r2, JIT_REG_KIND_I32);                                 \
+        }                                                                     \
+        else {                                                                \
+            CHECK_KIND(r2, JIT_REG_KIND_I64);                                 \
+        }                                                                     \
+                                                                              \
+        reg_no_dst = jit_reg_no(r0);                                          \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                           \
+        if (jit_reg_is_const(r1))                                             \
+            base = jit_cc_get_const_I32(cc, r1);                              \
+        else {                                                                \
+            reg_no_base = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_base, jit_reg_kind(r1));                      \
+        }                                                                     \
+        if (jit_reg_is_const(r2))                                             \
+            offset = jit_cc_get_const_I32(cc, r2);                            \
+        else {                                                                \
+            reg_no_offset = jit_reg_no(r2);                                   \
+            CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2));                    \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            if (jit_reg_is_const(r2))                                         \
+                _ret = ld_r_from_base_imm_offset_imm(                         \
+                    a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+                    base, offset);                                            \
+            else                                                              \
+                _ret = ld_r_from_base_imm_offset_r(                           \
+                    a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+                    base, reg_no_offset);                                     \
+        }                                                                     \
+        else if (jit_reg_is_const(r2))                                        \
+            _ret = ld_r_from_base_r_offset_imm(                               \
+                a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst,     \
+                reg_no_base, offset);                                         \
+        else                                                                  \
+            _ret = ld_r_from_base_r_offset_r(                                 \
+                a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst,     \
+                reg_no_base, reg_no_offset);                                  \
+        if (!_ret)                                                            \
+            GOTO_FAIL;                                                        \
+    } while (0)
+
+/**
+ * Encode insn sd: ST_type r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param bytes_dst the byte number of dst data
+ */
+#define ST_R_R_R(kind, type, bytes_dst)                                       \
+    do {                                                                      \
+        type data_src = 0;                                                    \
+        int32 reg_no_src = 0, reg_no_base = 0, reg_no_offset = 0;             \
+        int32 base = 0, offset = 0;                                           \
+        bool _ret = false;                                                    \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            CHECK_KIND(r1, JIT_REG_KIND_I32);                                 \
+        }                                                                     \
+        else {                                                                \
+            CHECK_KIND(r1, JIT_REG_KIND_I64);                                 \
+        }                                                                     \
+        if (jit_reg_is_const(r2)) {                                           \
+            CHECK_KIND(r2, JIT_REG_KIND_I32);                                 \
+        }                                                                     \
+        else {                                                                \
+            CHECK_KIND(r2, JIT_REG_KIND_I64);                                 \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r0))                                             \
+            data_src = jit_cc_get_const_##kind(cc, r0);                       \
+        else {                                                                \
+            reg_no_src = jit_reg_no(r0);                                      \
+            CHECK_REG_NO(reg_no_src, jit_reg_kind(r0));                       \
+        }                                                                     \
+        if (jit_reg_is_const(r1))                                             \
+            base = jit_cc_get_const_I32(cc, r1);                              \
+        else {                                                                \
+            reg_no_base = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_base, jit_reg_kind(r1));                      \
+        }                                                                     \
+        if (jit_reg_is_const(r2))                                             \
+            offset = jit_cc_get_const_I32(cc, r2);                            \
+        else {                                                                \
+            reg_no_offset = jit_reg_no(r2);                                   \
+            CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2));                    \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r0)) {                                           \
+            if (jit_reg_is_const(r1)) {                                       \
+                if (jit_reg_is_const(r2))                                     \
+                    _ret = st_imm_to_base_imm_offset_imm(                     \
+                        a, bytes_dst, &data_src, base, offset);               \
+                else                                                          \
+                    _ret = st_imm_to_base_imm_offset_r(                       \
+                        a, bytes_dst, &data_src, base, reg_no_offset);        \
+            }                                                                 \
+            else if (jit_reg_is_const(r2))                                    \
+                _ret = st_imm_to_base_r_offset_imm(a, bytes_dst, &data_src,   \
+                                                   reg_no_base, offset);      \
+            else                                                              \
+                _ret = st_imm_to_base_r_offset_r(a, bytes_dst, &data_src,     \
+                                                 reg_no_base, reg_no_offset); \
+        }                                                                     \
+        else if (jit_reg_is_const(r1)) {                                      \
+            if (jit_reg_is_const(r2))                                         \
+                _ret = st_r_to_base_imm_offset_imm(a, bytes_dst,              \
+                                                   JIT_REG_KIND_##kind,       \
+                                                   reg_no_src, base, offset); \
+            else                                                              \
+                _ret = st_r_to_base_imm_offset_r(                             \
+                    a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base,      \
+                    reg_no_offset);                                           \
+        }                                                                     \
+        else if (jit_reg_is_const(r2))                                        \
+            _ret =                                                            \
+                st_r_to_base_r_offset_imm(a, bytes_dst, JIT_REG_KIND_##kind,  \
+                                          reg_no_src, reg_no_base, offset);   \
+        else                                                                  \
+            _ret = st_r_to_base_r_offset_r(a, bytes_dst, JIT_REG_KIND_##kind, \
+                                           reg_no_src, reg_no_base,           \
+                                           reg_no_offset);                    \
+        if (!_ret)                                                            \
+            GOTO_FAIL;                                                        \
+    } while (0)
+
+/**
+ * Encode insn mov: MOV r0, r1
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ * @param bytes_dst the byte number of dst data
+ */
+#define MOV_R_R(kind, Type, type)                                \
+    do {                                                         \
+        bool _ret = false;                                       \
+        int32 reg_no_dst = 0, reg_no_src = 0;                    \
+        CHECK_EQKIND(r0, r1);                                    \
+                                                                 \
+        CHECK_NCONST(r0);                                        \
+        reg_no_dst = jit_reg_no(r0);                             \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));              \
+                                                                 \
+        if (jit_reg_is_const(r1)) {                              \
+            Type data = jit_cc_get_const_##kind(cc, r1);         \
+            _ret = mov_imm_to_r_##type(a, reg_no_dst, data);     \
+        }                                                        \
+        else {                                                   \
+            reg_no_src = jit_reg_no(r1);                         \
+            CHECK_REG_NO(reg_no_src, jit_reg_kind(r1));          \
+            _ret = mov_r_to_r_##type(a, reg_no_dst, reg_no_src); \
+        }                                                        \
+        if (!_ret)                                               \
+            GOTO_FAIL;                                           \
+    } while (0)
+
+/**
+ * Encode mov insn, MOV r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_mov(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            MOV_R_R(I32, int32, i32);
+            break;
+        case JIT_REG_KIND_I64:
+            MOV_R_R(I64, int64, i64);
+            break;
+        case JIT_REG_KIND_F32:
+            MOV_R_R(F32, float32, f32);
+            break;
+        case JIT_REG_KIND_F64:
+            MOV_R_R(F64, float64, f64);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of mov: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode insn neg: NEG r0, r1
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ */
+#define NEG_R_R(kind, Type, type)                                \
+    do {                                                         \
+        bool _ret = false;                                       \
+        int32 reg_no_dst = 0, reg_no_src = 0;                    \
+        CHECK_EQKIND(r0, r1);                                    \
+                                                                 \
+        CHECK_NCONST(r0);                                        \
+        reg_no_dst = jit_reg_no(r0);                             \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));              \
+                                                                 \
+        if (jit_reg_is_const(r1)) {                              \
+            Type data = jit_cc_get_const_##kind(cc, r1);         \
+            _ret = neg_imm_to_r_##type(a, reg_no_dst, data);     \
+        }                                                        \
+        else {                                                   \
+            reg_no_src = jit_reg_no(r1);                         \
+            CHECK_REG_NO(reg_no_src, jit_reg_kind(r1));          \
+            _ret = neg_r_to_r_##type(a, reg_no_dst, reg_no_src); \
+        }                                                        \
+        if (!_ret)                                               \
+            GOTO_FAIL;                                           \
+    } while (0)
+
+/**
+ * Encode neg insn, NEG r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_neg(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            NEG_R_R(I32, int32, i32);
+            break;
+        case JIT_REG_KIND_I64:
+            NEG_R_R(I64, int64, i64);
+            break;
+        case JIT_REG_KIND_F32:
+            NEG_R_R(F32, float32, f32);
+            break;
+        case JIT_REG_KIND_F64:
+            NEG_R_R(F64, float64, f64);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of neg: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode insn convert: I32TOI8 r0, r1, or I32TOI16, I32TOF32, F32TOF64, etc.
+ * @param kind0 the dst JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param kind1 the src JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param type0 the dst data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ * @param type1 the src data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ */
+#define CONVERT_R_R(kind0, kind1, type0, type1, Type1)                       \
+    do {                                                                     \
+        bool _ret = false;                                                   \
+        int32 reg_no_dst = 0, reg_no_src = 0;                                \
+        CHECK_KIND(r0, JIT_REG_KIND_##kind0);                                \
+        CHECK_KIND(r1, JIT_REG_KIND_##kind1);                                \
+                                                                             \
+        CHECK_NCONST(r0);                                                    \
+        reg_no_dst = jit_reg_no(r0);                                         \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                          \
+                                                                             \
+        if (jit_reg_is_const(r1)) {                                          \
+            Type1 data = jit_cc_get_const_##kind1(cc, r1);                   \
+            _ret = convert_imm_##type1##_to_r_##type0(a, reg_no_dst, data);  \
+        }                                                                    \
+        else {                                                               \
+            reg_no_src = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_src, jit_reg_kind(r1));                      \
+            _ret =                                                           \
+                convert_r_##type1##_to_r_##type0(a, reg_no_dst, reg_no_src); \
+        }                                                                    \
+        if (!_ret)                                                           \
+            GOTO_FAIL;                                                       \
+    } while (0)
+
+/**
+ * Encode insn alu: ADD/SUB/MUL/DIV/REM r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ * @param op the opcode of alu
+ */
+#define ALU_R_R_R(kind, Type, type, op)                                       \
+    do {                                                                      \
+        Type data1, data2;                                                    \
+        int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0;               \
+        bool _ret = false;                                                    \
+                                                                              \
+        CHECK_EQKIND(r0, r1);                                                 \
+        CHECK_EQKIND(r0, r2);                                                 \
+        memset(&data1, 0, sizeof(Type));                                      \
+        memset(&data2, 0, sizeof(Type));                                      \
+                                                                              \
+        reg_no_dst = jit_reg_no(r0);                                          \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                           \
+        if (jit_reg_is_const(r1))                                             \
+            data1 = jit_cc_get_const_##kind(cc, r1);                          \
+        else {                                                                \
+            reg_no_src1 = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1));                      \
+        }                                                                     \
+        if (jit_reg_is_const(r2))                                             \
+            data2 = jit_cc_get_const_##kind(cc, r2);                          \
+        else {                                                                \
+            reg_no_src2 = jit_reg_no(r2);                                     \
+            CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2));                      \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            if (jit_reg_is_const(r2))                                         \
+                _ret =                                                        \
+                    alu_imm_imm_to_r_##type(a, op, reg_no_dst, data1, data2); \
+            else                                                              \
+                _ret = alu_imm_r_to_r_##type(a, op, reg_no_dst, data1,        \
+                                             reg_no_src2);                    \
+        }                                                                     \
+        else if (jit_reg_is_const(r2))                                        \
+            _ret =                                                            \
+                alu_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, data2); \
+        else                                                                  \
+            _ret = alu_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1,        \
+                                       reg_no_src2);                          \
+        if (!_ret)                                                            \
+            GOTO_FAIL;                                                        \
+    } while (0)
+
+/**
+ * Encode alu insn, ADD/SUB/MUL/DIV/REM r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of alu operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_alu(JitCompContext *cc, x86::Assembler &a, ALU_OP op, JitReg r0,
+          JitReg r1, JitReg r2)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            ALU_R_R_R(I32, int32, i32, op);
+            break;
+        case JIT_REG_KIND_I64:
+            ALU_R_R_R(I64, int64, i64, op);
+            break;
+        case JIT_REG_KIND_F32:
+            ALU_R_R_R(F32, float32, f32, op);
+            break;
+        case JIT_REG_KIND_F64:
+            ALU_R_R_R(F64, float64, f64, op);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of alu: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode insn bit: AND/OR/XOR r0, r1, r2
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of bit operation
+ */
+#define BIT_R_R_R(kind, Type, type, op)                                       \
+    do {                                                                      \
+        Type data1, data2;                                                    \
+        int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0;               \
+        bool _ret = false;                                                    \
+                                                                              \
+        CHECK_EQKIND(r0, r1);                                                 \
+        CHECK_EQKIND(r0, r2);                                                 \
+        memset(&data1, 0, sizeof(Type));                                      \
+        memset(&data2, 0, sizeof(Type));                                      \
+                                                                              \
+        reg_no_dst = jit_reg_no(r0);                                          \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                           \
+        if (jit_reg_is_const(r1))                                             \
+            data1 = jit_cc_get_const_##kind(cc, r1);                          \
+        else {                                                                \
+            reg_no_src1 = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1));                      \
+        }                                                                     \
+        if (jit_reg_is_const(r2))                                             \
+            data2 = jit_cc_get_const_##kind(cc, r2);                          \
+        else {                                                                \
+            reg_no_src2 = jit_reg_no(r2);                                     \
+            CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2));                      \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            if (jit_reg_is_const(r2))                                         \
+                _ret =                                                        \
+                    bit_imm_imm_to_r_##type(a, op, reg_no_dst, data1, data2); \
+            else                                                              \
+                _ret = bit_imm_r_to_r_##type(a, op, reg_no_dst, data1,        \
+                                             reg_no_src2);                    \
+        }                                                                     \
+        else if (jit_reg_is_const(r2))                                        \
+            _ret =                                                            \
+                bit_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, data2); \
+        else                                                                  \
+            _ret = bit_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1,        \
+                                       reg_no_src2);                          \
+        if (!_ret)                                                            \
+            GOTO_FAIL;                                                        \
+    } while (0)
+
+/**
+ * Encode bit insn, AND/OR/XOR r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of bit operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_bit(JitCompContext *cc, x86::Assembler &a, BIT_OP op, JitReg r0,
+          JitReg r1, JitReg r2)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            BIT_R_R_R(I32, int32, i32, op);
+            break;
+        case JIT_REG_KIND_I64:
+            BIT_R_R_R(I64, int64, i64, op);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of bit: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode insn shift: SHL/SHRS/SHRU r0, r1, r2
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of shift operation
+ */
+#define SHIFT_R_R_R(kind, Type, type, op)                                  \
+    do {                                                                   \
+        Type data1, data2;                                                 \
+        int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0;            \
+        bool _ret = false;                                                 \
+                                                                           \
+        CHECK_EQKIND(r0, r1);                                              \
+        CHECK_KIND(r2, JIT_REG_KIND_##kind);                               \
+        memset(&data1, 0, sizeof(Type));                                   \
+        memset(&data2, 0, sizeof(Type));                                   \
+                                                                           \
+        reg_no_dst = jit_reg_no(r0);                                       \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                        \
+        if (jit_reg_is_const(r1))                                          \
+            data1 = jit_cc_get_const_##kind(cc, r1);                       \
+        else {                                                             \
+            reg_no_src1 = jit_reg_no(r1);                                  \
+            CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1));                   \
+        }                                                                  \
+        if (jit_reg_is_const(r2))                                          \
+            data2 = jit_cc_get_const_##kind(cc, r2);                       \
+        else {                                                             \
+            reg_no_src2 = jit_reg_no(r2);                                  \
+            CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2));                   \
+        }                                                                  \
+                                                                           \
+        if (jit_reg_is_const(r1)) {                                        \
+            if (jit_reg_is_const(r2))                                      \
+                _ret = shift_imm_imm_to_r_##type(a, op, reg_no_dst, data1, \
+                                                 data2);                   \
+            else                                                           \
+                _ret = shift_imm_r_to_r_##type(a, op, reg_no_dst, data1,   \
+                                               reg_no_src2);               \
+        }                                                                  \
+        else if (jit_reg_is_const(r2))                                     \
+            _ret = shift_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, \
+                                           data2);                         \
+        else                                                               \
+            _ret = shift_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1,   \
+                                         reg_no_src2);                     \
+        if (!_ret)                                                         \
+            GOTO_FAIL;                                                     \
+    } while (0)
+
+/**
+ * Encode shift insn, SHL/SHRS/SHRU r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_shift(JitCompContext *cc, x86::Assembler &a, SHIFT_OP op, JitReg r0,
+            JitReg r1, JitReg r2)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            SHIFT_R_R_R(I32, int32, i32, op);
+            break;
+        case JIT_REG_KIND_I64:
+            SHIFT_R_R_R(I64, int64, i64, op);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of shift: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode int32 bitcount operation of reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BITCOUNT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no_src the reg no of first src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bitcount_r_to_r_i32(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst,
+                    int32 reg_no_src)
+{
+    switch (op) {
+        case CLZ:
+            a.lzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        case CTZ:
+            a.tzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        case POPCNT:
+            a.popcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+    return true;
+}
+
+/**
+ * Encode int64 bitcount operation of reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BITCOUNT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no_src the reg no of first src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bitcount_r_to_r_i64(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst,
+                    int32 reg_no_src)
+{
+    switch (op) {
+        case CLZ:
+            a.lzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        case CTZ:
+            a.tzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        case POPCNT:
+            a.popcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+    return true;
+}
+
+/**
+ * Encode insn bitcount: CLZ/CTZ/POPCNT r0, r1
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of bit operation
+ */
+#define BITCOUNT_R_R(kind, Type, type, op)                          \
+    do {                                                            \
+        int32 reg_no_dst = 0, reg_no_src = 0;                       \
+                                                                    \
+        CHECK_EQKIND(r0, r1);                                       \
+        CHECK_NCONST(r0);                                           \
+        CHECK_NCONST(r1);                                           \
+                                                                    \
+        reg_no_dst = jit_reg_no(r0);                                \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                 \
+        reg_no_src = jit_reg_no(r1);                                \
+        CHECK_REG_NO(reg_no_src, jit_reg_kind(r1));                 \
+        if (!bitcount_r_to_r_##type(a, op, reg_no_dst, reg_no_src)) \
+            GOTO_FAIL;                                              \
+    } while (0)
+
+/**
+ * Encode bitcount insn, CLZ/CTZ/POPCNT r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of bitcount operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_bitcount(JitCompContext *cc, x86::Assembler &a, BITCOUNT_OP op, JitReg r0,
+               JitReg r1)
+{
+    switch (jit_reg_kind(r0)) {
+        case JIT_REG_KIND_I32:
+            BITCOUNT_R_R(I32, int32, i32, op);
+            break;
+        case JIT_REG_KIND_I64:
+            BITCOUNT_R_R(I64, int64, i64, op);
+            break;
+        default:
+            LOG_VERBOSE("Invalid reg type of bit: %d\n", jit_reg_kind(r0));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode insn cmp: CMP r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ */
+#define CMP_R_R_R(kind, Type, type)                                           \
+    do {                                                                      \
+        Type data1, data2;                                                    \
+        int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0;               \
+        bool _ret = false;                                                    \
+                                                                              \
+        CHECK_KIND(r0, JIT_REG_KIND_I32);                                     \
+        CHECK_KIND(r1, JIT_REG_KIND_##kind);                                  \
+        CHECK_EQKIND(r1, r2);                                                 \
+        memset(&data1, 0, sizeof(Type));                                      \
+        memset(&data2, 0, sizeof(Type));                                      \
+                                                                              \
+        reg_no_dst = jit_reg_no(r0);                                          \
+        CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0));                           \
+        if (jit_reg_is_const(r1))                                             \
+            data1 = jit_cc_get_const_##kind(cc, r1);                          \
+        else {                                                                \
+            reg_no_src1 = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1));                      \
+        }                                                                     \
+        if (jit_reg_is_const(r2))                                             \
+            data2 = jit_cc_get_const_##kind(cc, r2);                          \
+        else {                                                                \
+            reg_no_src2 = jit_reg_no(r2);                                     \
+            CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2));                      \
+        }                                                                     \
+                                                                              \
+        if (jit_reg_is_const(r1)) {                                           \
+            if (jit_reg_is_const(r2))                                         \
+                _ret = cmp_imm_imm_to_r_##type(a, reg_no_dst, data1, data2);  \
+            else                                                              \
+                _ret =                                                        \
+                    cmp_imm_r_to_r_##type(a, reg_no_dst, data1, reg_no_src2); \
+        }                                                                     \
+        else if (jit_reg_is_const(r2))                                        \
+            _ret = cmp_r_imm_to_r_##type(a, reg_no_dst, reg_no_src1, data2);  \
+        else                                                                  \
+            _ret =                                                            \
+                cmp_r_r_to_r_##type(a, reg_no_dst, reg_no_src1, reg_no_src2); \
+        if (!_ret)                                                            \
+            GOTO_FAIL;                                                        \
+    } while (0)
+
+/**
+ * Encode cmp insn, CMP r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 condition jit register
+ * @param r2 src jit register that contains the first src operand info
+ * @param r3 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_cmp(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1,
+          JitReg r2)
+{
+    switch (jit_reg_kind(r1)) {
+        case JIT_REG_KIND_I32:
+            CMP_R_R_R(I32, int32, i32);
+            cc->last_cmp_on_fp = false;
+            break;
+        case JIT_REG_KIND_I64:
+            CMP_R_R_R(I64, int64, i64);
+            cc->last_cmp_on_fp = false;
+            break;
+        case JIT_REG_KIND_F32:
+            CMP_R_R_R(F32, float32, f32);
+            cc->last_cmp_on_fp = true;
+            break;
+        case JIT_REG_KIND_F64:
+            CMP_R_R_R(F64, float64, f64);
+            cc->last_cmp_on_fp = true;
+            break;
+        default:
+            cc->last_cmp_on_fp = false;
+            LOG_VERBOSE("Invalid reg type of cmp: %d\n", jit_reg_kind(r1));
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode detecting the cmp flags in reg, and jmp to the relative address
+ * according to the condition opcode
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the condition opcode to jmp
+ * @param offset the relative offset to jmp when the contidtion meeted
+ *
+ * @return return the next address of native code after encoded
+ */
+static bool
+cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op,
+                       int32 offset)
+{
+    Imm target(INT32_MAX);
+    char *stream;
+    bool fp_cmp = cc->last_cmp_on_fp;
+
+    bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
+
+    switch (op) {
+        case EQ:
+        {
+            a.je(target);
+            break;
+        }
+        case NE:
+        {
+            a.jne(target);
+            break;
+        }
+        case GTS:
+        {
+            if (fp_cmp) {
+                a.ja(target);
+            }
+            else {
+                a.jg(target);
+            }
+            break;
+        }
+        case LES:
+        {
+            a.jng(target);
+            break;
+        }
+        case GES:
+        {
+            if (fp_cmp) {
+                a.jae(target);
+            }
+            else {
+                a.jnl(target);
+            }
+            break;
+        }
+        case LTS:
+        {
+            a.jl(target);
+            break;
+        }
+        case GTU:
+        {
+            a.ja(target);
+            break;
+        }
+        case LEU:
+        {
+            a.jna(target);
+            break;
+        }
+        case GEU:
+        {
+            a.jae(target);
+            break;
+        }
+        case LTU:
+        {
+            a.jb(target);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            break;
+        }
+    }
+
+    JitErrorHandler *err_handler = (JitErrorHandler *)a.code()->errorHandler();
+
+    if (!err_handler->err) {
+        /* The offset written by asmjit is always 0, we patch it again */
+        stream = (char *)a.code()->sectionById(0)->buffer().data()
+                 + a.code()->sectionById(0)->buffer().size() - 6;
+        *(int32 *)(stream + 2) = offset;
+    }
+    return true;
+}
+
+/**
+ * Encode select insn, SELECT r0, r1, r2, r3
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+/* TODO: optimize with setcc */
+static bool
+lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
+             JitReg r1, JitReg r2, JitReg r3)
+{
+    JitErrorHandler err_handler;
+    Environment env(Arch::kX64);
+    CodeHolder code1, code2;
+    char *stream_mov1, *stream_mov2;
+    uint32 size_mov1, size_mov2;
+
+    code1.init(env);
+    code1.setErrorHandler(&err_handler);
+    x86::Assembler a1(&code1);
+
+    code2.init(env);
+    code2.setErrorHandler(&err_handler);
+    x86::Assembler a2(&code2);
+
+    CHECK_NCONST(r0);
+    CHECK_NCONST(r1);
+    CHECK_KIND(r1, JIT_REG_KIND_I32);
+
+    if (r0 == r3 && r0 != r2 && !cc->last_cmp_on_fp) {
+        JitReg r_tmp;
+
+        /* For i32/i64, exchange r2 and r3 to make r0 equal to r2,
+           so as to decrease possible execution instructions.
+           For f32/f64 comparison, should not change the order as
+           the result of comparison with NaN may be different. */
+        r_tmp = r2;
+        r2 = r3;
+        r3 = r_tmp;
+        op = not_cond(op);
+    }
+
+    if (!lower_mov(cc, a1, r0, r2))
+        GOTO_FAIL;
+
+    if (!lower_mov(cc, a2, r0, r3))
+        GOTO_FAIL;
+
+    stream_mov1 = (char *)a1.code()->sectionById(0)->buffer().data();
+    size_mov1 = a1.code()->sectionById(0)->buffer().size();
+    stream_mov2 = (char *)a2.code()->sectionById(0)->buffer().data();
+    size_mov2 = a2.code()->sectionById(0)->buffer().size();
+
+    if (r0 != r2) {
+        a.embedDataArray(TypeId::kInt8, stream_mov1, size_mov1);
+    }
+
+    if (r3 && r0 != r3) {
+        if (!cmp_r_and_jmp_relative(cc, a, op, (int32)size_mov2))
+            return false;
+        a.embedDataArray(TypeId::kInt8, stream_mov2, size_mov2);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/* jmp to dst label */
+#define JMP_TO_LABEL(label_dst, label_src)                                 \
+    do {                                                                   \
+        if (label_is_ahead(cc, label_dst, label_src)) {                    \
+            JitErrorHandler *err_handler =                                 \
+                (JitErrorHandler *)a.code()->errorHandler();               \
+            int32 _offset;                                                 \
+            char *stream;                                                  \
+            Imm imm(INT32_MAX);                                            \
+            a.jmp(imm);                                                    \
+            if (!err_handler->err) {                                       \
+                /* The offset written by asmjit is always 0, we patch it   \
+                   again, 6 is the size of jmp instruciton */              \
+                stream = (char *)a.code()->sectionById(0)->buffer().data() \
+                         + a.code()->sectionById(0)->buffer().size() - 6;  \
+                _offset = label_offsets[label_dst]                         \
+                          - a.code()->sectionById(0)->buffer().size();     \
+                *(int32 *)(stream + 2) = _offset;                          \
+            }                                                              \
+        }                                                                  \
+        else {                                                             \
+            if (!jmp_from_label_to_label(a, jmp_info_list, label_dst,      \
+                                         label_src))                       \
+                GOTO_FAIL;                                                 \
+        }                                                                  \
+    } while (0)
+
+/**
+ * Encode branch insn, BEQ/BNE/../BLTU r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+             int32 label_src, COND_OP op, JitReg r0, JitReg r1, JitReg r2,
+             bool is_last_insn)
+{
+    int32 label_dst;
+
+    CHECK_NCONST(r0);
+    CHECK_KIND(r0, JIT_REG_KIND_I32);
+    CHECK_KIND(r1, JIT_REG_KIND_L32);
+
+    CHECK_REG_NO(jit_reg_no(r0), jit_reg_kind(r0));
+
+    label_dst = jit_reg_no(r1);
+    if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn
+        && label_is_neighboring(cc, label_src, label_dst)
+        && !cc->last_cmp_on_fp) {
+        JitReg r_tmp;
+
+        r_tmp = r1;
+        r1 = r2;
+        r2 = r_tmp;
+        op = not_cond(op);
+    }
+
+    if (!cmp_r_and_jmp_label(cc, a, jmp_info_list, label_src, op, r1, r2,
+                             is_last_insn))
+        GOTO_FAIL;
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode lookupswitch with key of immediate data
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param key the entry key
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lookupswitch_imm(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+                 uint32 *label_offsets, int32 label_src, int32 key,
+                 const JitOpndLookupSwitch *opnd, bool is_last_insn)
+{
+    uint32 i;
+    int32 label_dst;
+
+    for (i = 0; i < opnd->match_pairs_num; i++)
+        if (key == opnd->match_pairs[i].value) {
+            label_dst = jit_reg_no(opnd->match_pairs[i].target);
+            if (!(is_last_insn
+                  && label_is_neighboring(cc, label_src, label_dst))) {
+                JMP_TO_LABEL(label_dst, label_src);
+            }
+            return true;
+        }
+
+    if (opnd->default_target) {
+        label_dst = jit_reg_no(opnd->default_target);
+        if (!(is_last_insn && label_is_neighboring(cc, label_src, label_dst))) {
+            JMP_TO_LABEL(label_dst, label_src);
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode detecting lookupswitch entry register and jumping to matched label
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param reg_no the no of entry register
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lookupswitch_r(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+               uint32 *label_offsets, int32 label_src, int32 reg_no,
+               const JitOpndLookupSwitch *opnd, bool is_last_insn)
+{
+    JmpInfo *node;
+    Imm imm;
+    x86::Mem m;
+    uint32 i;
+    int32 label_dst = 0;
+    char *stream;
+
+    if (opnd->match_pairs_num < 10) {
+        /* For small count of branches, it is better to compare
+           the key with branch value and jump one by one */
+        for (i = 0; i < opnd->match_pairs_num; i++) {
+            imm.setValue(opnd->match_pairs[i].value);
+            a.cmp(regs_i32[reg_no], imm);
+
+            node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+            if (!node)
+                GOTO_FAIL;
+
+            node->type = JMP_DST_LABEL_REL;
+            node->label_src = label_src;
+            node->dst_info.label_dst = jit_reg_no(opnd->match_pairs[i].target);
+            node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+            bh_list_insert(jmp_info_list, node);
+
+            imm.setValue(INT32_MAX);
+            a.je(imm);
+        }
+
+        if (opnd->default_target) {
+            label_dst = jit_reg_no(opnd->default_target);
+            if (!(is_last_insn
+                  && label_is_neighboring(cc, label_src, label_dst)))
+                JMP_TO_LABEL(label_dst, label_src);
+        }
+    }
+    else {
+        /* For bigger count of branches, use indirect jump */
+        /* unsigned extend to rsi */
+        a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[reg_no]);
+        imm.setValue(opnd->match_pairs_num);
+        a.cmp(regs_i64[REG_I64_FREE_IDX], imm);
+
+        /* Jump to default label if rsi >= br_count */
+        stream = (char *)a.code()->sectionById(0)->buffer().data()
+                 + a.code()->sectionById(0)->buffer().size();
+        imm.setValue(INT32_MAX);
+        a.jb(imm);
+        *(uint32 *)(stream + 2) = 6;
+
+        node = (JmpInfo *)jit_calloc(sizeof(JmpInfo));
+        if (!node)
+            goto fail;
+
+        node->type = JMP_DST_LABEL_REL;
+        node->label_src = label_src;
+        node->dst_info.label_dst = jit_reg_no(opnd->default_target);
+        node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+        bh_list_insert(jmp_info_list, node);
+
+        imm.setValue(INT32_MAX);
+        a.jmp(imm);
+
+        node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+        if (!node)
+            GOTO_FAIL;
+
+        node->type = JMP_LOOKUPSWITCH_BASE;
+        node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+        bh_list_insert(jmp_info_list, node);
+
+        /* LookupSwitch table base addr */
+        imm.setValue(INT64_MAX);
+        a.mov(regs_i64[reg_no], imm);
+
+        /* jmp *(base_addr + rsi * 8) */
+        m = x86::ptr(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX], 3);
+        a.jmp(m);
+
+        /* Store each dst label absolute address */
+        for (i = 0; i < opnd->match_pairs_num; i++) {
+            node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+            if (!node)
+                GOTO_FAIL;
+
+            node->type = JMP_DST_LABEL_ABS;
+            node->dst_info.label_dst = jit_reg_no(opnd->match_pairs[i].target);
+            node->offset = a.code()->sectionById(0)->buffer().size();
+            bh_list_insert(jmp_info_list, node);
+
+            a.embedUInt64(UINT64_MAX);
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode lookupswitch insn, LOOKUPSWITCH opnd
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_lookupswitch(JitCompContext *cc, x86::Assembler &a,
+                   bh_list *jmp_info_list, uint32 *label_offsets,
+                   int32 label_src, const JitOpndLookupSwitch *opnd,
+                   bool is_last_insn)
+{
+    JitReg r0 = opnd->value;
+    int32 key, reg_no;
+
+    CHECK_KIND(r0, JIT_REG_KIND_I32);
+    CHECK_KIND(opnd->default_target, JIT_REG_KIND_L32);
+
+    if (jit_reg_is_const(r0)) {
+        key = jit_cc_get_const_I32(cc, r0);
+        if (!lookupswitch_imm(cc, a, jmp_info_list, label_offsets, label_src,
+                              key, opnd, is_last_insn))
+            GOTO_FAIL;
+    }
+    else {
+        reg_no = jit_reg_no(r0);
+        CHECK_I32_REG_NO(reg_no);
+        if (!lookupswitch_r(cc, a, jmp_info_list, label_offsets, label_src,
+                            reg_no, opnd, is_last_insn))
+            GOTO_FAIL;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode callnative insn, CALLNATIVE r0, r1, ...
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param insn current insn info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_callnative(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+    void (*func_ptr)(void);
+    JitReg ret_reg, func_reg, arg_reg;
+    /* the index of callee saved registers in regs_i64 */
+    uint8 regs_arg_idx[] = { REG_RDI_IDX, REG_RSI_IDX, REG_RDX_IDX,
+                             REG_RCX_IDX, REG_R8_IDX,  REG_R9_IDX };
+    Imm imm;
+    uint32 i, opnd_num;
+    int32 integer_reg_index = 0, floatpoint_reg_index = 0;
+
+    ret_reg = *(jit_insn_opndv(insn, 0));
+    func_reg = *(jit_insn_opndv(insn, 1));
+    CHECK_KIND(func_reg, JIT_REG_KIND_I64);
+    CHECK_CONST(func_reg);
+
+    func_ptr = (void (*)(void))jit_cc_get_const_I64(cc, func_reg);
+
+    opnd_num = jit_insn_opndv_num(insn);
+    for (i = 0; i < opnd_num - 2; i++) {
+        /*TODO: if arguments number is greater than 6 */
+        bh_assert(integer_reg_index < 6);
+        bh_assert(floatpoint_reg_index < 6);
+
+        arg_reg = *(jit_insn_opndv(insn, i + 2));
+        switch (jit_reg_kind(arg_reg)) {
+            case JIT_REG_KIND_I32:
+            {
+                int32 reg_no = regs_arg_idx[integer_reg_index++];
+                CHECK_I64_REG_NO(reg_no);
+                if (jit_reg_is_const(arg_reg)) {
+                    mov_imm_to_r_i64(a, reg_no,
+                                     (int64)jit_cc_get_const_I32(cc, arg_reg));
+                }
+                else {
+                    int32 arg_reg_no = jit_reg_no(arg_reg);
+                    CHECK_I32_REG_NO(arg_reg_no);
+                    extend_r32_to_r64(a, reg_no, arg_reg_no, true);
+                }
+                break;
+            }
+            case JIT_REG_KIND_I64:
+            {
+                int32 reg_no = regs_arg_idx[integer_reg_index++];
+                CHECK_I64_REG_NO(reg_no);
+                if (jit_reg_is_const(arg_reg)) {
+                    mov_imm_to_r_i64(a, reg_no,
+                                     jit_cc_get_const_I64(cc, arg_reg));
+                }
+                else {
+                    int32 arg_reg_no = jit_reg_no(arg_reg);
+                    CHECK_I64_REG_NO(arg_reg_no);
+                    mov_r_to_r_i64(a, reg_no, arg_reg_no);
+                }
+                break;
+            }
+            case JIT_REG_KIND_F32:
+            {
+                CHECK_F32_REG_NO((int32)floatpoint_reg_index);
+                if (jit_reg_is_const(arg_reg)) {
+                    mov_imm_to_r_f32(a, floatpoint_reg_index,
+                                     jit_cc_get_const_F32(cc, arg_reg));
+                }
+                else {
+                    int32 arg_reg_no = jit_reg_no(arg_reg);
+                    CHECK_F32_REG_NO(arg_reg_no);
+                    mov_r_to_r_f32(a, floatpoint_reg_index, arg_reg_no);
+                }
+                floatpoint_reg_index++;
+                break;
+            }
+            case JIT_REG_KIND_F64:
+            {
+                CHECK_F64_REG_NO((int32)floatpoint_reg_index);
+                if (jit_reg_is_const(arg_reg)) {
+                    mov_imm_to_r_f64(a, floatpoint_reg_index,
+                                     jit_cc_get_const_F64(cc, arg_reg));
+                }
+                else {
+                    int32 arg_reg_no = jit_reg_no(arg_reg);
+                    CHECK_F64_REG_NO(arg_reg_no);
+                    mov_r_to_r_f64(a, floatpoint_reg_index, arg_reg_no);
+                }
+                floatpoint_reg_index++;
+                break;
+            }
+            default:
+            {
+
+                bh_assert(0);
+                goto fail;
+            }
+        }
+    }
+
+    imm.setValue((uint64)func_ptr);
+    a.mov(regs_i64[REG_RAX_IDX], imm);
+    a.call(regs_i64[REG_RAX_IDX]);
+
+    if (ret_reg) {
+        bh_assert((jit_reg_kind(ret_reg) == JIT_REG_KIND_I32
+                   && jit_reg_no(ret_reg) == REG_EAX_IDX)
+                  || (jit_reg_kind(ret_reg) == JIT_REG_KIND_I64
+                      && jit_reg_no(ret_reg) == REG_RAX_IDX)
+                  || ((jit_reg_kind(ret_reg) == JIT_REG_KIND_F32
+                       || jit_reg_kind(ret_reg) == JIT_REG_KIND_F64)
+                      && jit_reg_no(ret_reg) == 0));
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Encode callbc insn, CALLBC r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_src the index of src label
+ * @param insn current insn info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_callbc(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+             int32 label_src, JitInsn *insn)
+{
+    JmpInfo *node;
+    Imm imm;
+    JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
+    JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
+    JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
+    JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
+    JitReg ret_reg = *(jit_insn_opnd(insn, 0));
+    JitReg func_reg = *(jit_insn_opnd(insn, 2));
+    JitReg src_reg;
+    int32 func_reg_no;
+
+    /* Load return_jitted_addr from stack */
+    x86::Mem m(x86::rbp, cc->jitted_return_address_offset);
+
+    CHECK_KIND(func_reg, JIT_REG_KIND_I64);
+    func_reg_no = jit_reg_no(func_reg);
+    CHECK_I64_REG_NO(func_reg_no);
+
+    node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+    if (!node)
+        GOTO_FAIL;
+
+    node->type = JMP_END_OF_CALLBC;
+    node->label_src = label_src;
+    node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+    bh_list_insert(jmp_info_list, node);
+
+    /* Set next jited addr to glue_ret_jited_addr, 0 will be replaced with
+       actual offset after actual code cache is allocated */
+    imm.setValue(INT64_MAX);
+    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+    a.mov(m, regs_i64[REG_I64_FREE_IDX]);
+    a.jmp(regs_i64[func_reg_no]);
+
+    if (ret_reg) {
+        switch (jit_reg_kind(ret_reg)) {
+            case JIT_REG_KIND_I32:
+                src_reg = edx_hreg;
+                break;
+            case JIT_REG_KIND_I64:
+                src_reg = rdx_hreg;
+                break;
+            case JIT_REG_KIND_F32:
+                src_reg = xmm0_f32_hreg;
+                break;
+            case JIT_REG_KIND_F64:
+                src_reg = xmm0_f64_hreg;
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+
+        if (!lower_mov(cc, a, ret_reg, src_reg))
+            return false;
+    }
+    return true;
+fail:
+    return false;
+}
+
+static bool
+lower_returnbc(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+    JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
+    JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
+    JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
+    JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
+    JitReg act_reg = *(jit_insn_opnd(insn, 0));
+    JitReg ret_reg = *(jit_insn_opnd(insn, 1));
+    JitReg dst_reg;
+    int32 act;
+
+    CHECK_CONST(act_reg);
+    CHECK_KIND(act_reg, JIT_REG_KIND_I32);
+
+    act = jit_cc_get_const_I32(cc, act_reg);
+
+    if (ret_reg) {
+        switch (jit_reg_kind(ret_reg)) {
+            case JIT_REG_KIND_I32:
+                dst_reg = edx_hreg;
+                break;
+            case JIT_REG_KIND_I64:
+                dst_reg = rdx_hreg;
+                break;
+            case JIT_REG_KIND_F32:
+                dst_reg = xmm0_f32_hreg;
+                break;
+            case JIT_REG_KIND_F64:
+                dst_reg = xmm0_f64_hreg;
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+        if (!lower_mov(cc, a, dst_reg, ret_reg))
+            return false;
+    }
+
+    {
+        /* eax = act */
+        Imm imm(act);
+        a.mov(x86::eax, imm);
+
+        x86::Mem m(x86::rbp, cc->jitted_return_address_offset);
+        a.jmp(m);
+    }
+    return true;
+fail:
+    return false;
+}
+
+static bool
+lower_return(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+    JitReg act_reg = *(jit_insn_opnd(insn, 0));
+    int32 act;
+
+    CHECK_CONST(act_reg);
+    CHECK_KIND(act_reg, JIT_REG_KIND_I32);
+
+    act = jit_cc_get_const_I32(cc, act_reg);
+    {
+        /* eax = act */
+        Imm imm(act);
+        a.mov(x86::eax, imm);
+
+        imm.setValue((uintptr_t)code_block_return_to_interp_from_jitted);
+        a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+        a.jmp(regs_i64[REG_I64_FREE_IDX]);
+    }
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * Replace all the jmp address pre-saved when the code cache hasn't been
+ * allocated with actual address after code cache allocated
+ *
+ * @param cc compiler context containting the allocated code cacha info
+ * @param jmp_info_list the jmp info list
+ */
+static void
+patch_jmp_info_list(JitCompContext *cc, bh_list *jmp_info_list)
+{
+    JmpInfo *jmp_info, *jmp_info_next;
+    JitReg reg_dst;
+    char *stream;
+
+    jmp_info = (JmpInfo *)bh_list_first_elem(jmp_info_list);
+
+    while (jmp_info) {
+        jmp_info_next = (JmpInfo *)bh_list_elem_next(jmp_info);
+
+        stream = (char *)cc->jitted_addr_begin + jmp_info->offset;
+
+        if (jmp_info->type == JMP_DST_LABEL_REL) {
+            /* Jmp with relative address */
+            reg_dst =
+                jit_reg_new(JIT_REG_KIND_L32, jmp_info->dst_info.label_dst);
+            *(int32 *)stream =
+                (int32)((uintptr_t)*jit_annl_jitted_addr(cc, reg_dst)
+                        - (uintptr_t)stream)
+                - 4;
+        }
+        else if (jmp_info->type == JMP_DST_LABEL_ABS) {
+            /* Jmp with absolute address */
+            reg_dst =
+                jit_reg_new(JIT_REG_KIND_L32, jmp_info->dst_info.label_dst);
+            *(uintptr_t *)stream =
+                (uintptr_t)*jit_annl_jitted_addr(cc, reg_dst);
+        }
+        else if (jmp_info->type == JMP_END_OF_CALLBC) {
+            /* 7 is the size of mov and jmp instruction */
+            *(uintptr_t *)stream = (uintptr_t)stream + sizeof(uintptr_t) + 7;
+        }
+        else if (jmp_info->type == JMP_LOOKUPSWITCH_BASE) {
+            /* 11 is the size of 8-byte addr and 3-byte jmp instruction */
+            *(uintptr_t *)stream = (uintptr_t)stream + 11;
+        }
+
+        jmp_info = jmp_info_next;
+    }
+}
+
+/* Free the jmp info list */
+static void
+free_jmp_info_list(bh_list *jmp_info_list)
+{
+    void *cur_node = bh_list_first_elem(jmp_info_list);
+
+    while (cur_node) {
+        void *next_node = bh_list_elem_next(cur_node);
+
+        bh_list_remove(jmp_info_list, cur_node);
+        jit_free(cur_node);
+        cur_node = next_node;
+    }
+}
+
+/**
+ * Encode cast int32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_i32_to_r_f32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+    Imm imm(data);
+    a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+    a.movd(regs_float[reg_no], regs_i32[REG_I32_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode cast int32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_i32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.movd(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode cast int64 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_i64_to_r_f64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+    Imm imm(data);
+    a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+    a.movq(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+    return true;
+}
+
+/**
+ * Encode cast int64 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_i64_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.movq(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode cast float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_f32_to_r_i32(x86::Assembler &a, int32 reg_no, float data)
+{
+    cast_float_to_integer v = { .f = data };
+    return mov_imm_to_r_i32(a, reg_no, v.i);
+}
+
+/**
+ * Encode cast float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_f32_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.movd(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode cast double immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_f64_to_r_i64(x86::Assembler &a, int32 reg_no, double data)
+{
+    cast_double_to_integer v = { .d = data };
+    return mov_imm_to_r_i64(a, reg_no, v.i);
+}
+
+/**
+ * Encode cast float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_f64_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+    a.movq(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+    return true;
+}
+
+/**
+ * Encode insn cast: F32CASTI32,
+ * @param kind0 the dst JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param kind1 the src JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param type0 the dst data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ * @param type1 the src data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ */
+#define CAST_R_R(kind0, kind1, type0, type1, Type1)                          \
+    do {                                                                     \
+        bool _ret = false;                                                   \
+        int32 reg_no_dst = 0, reg_no_src = 0;                                \
+        CHECK_KIND(r0, JIT_REG_KIND_##kind0);                                \
+        CHECK_KIND(r1, JIT_REG_KIND_##kind1);                                \
+                                                                             \
+        reg_no_dst = jit_reg_no(r0);                                         \
+        CHECK_REG_NO(reg_no_dst, JIT_REG_KIND_##kind0);                      \
+        if (jit_reg_is_const(r1)) {                                          \
+            Type1 data = jit_cc_get_const_##kind1(cc, r1);                   \
+            _ret = cast_imm_##type1##_to_r_##type0(a, reg_no_dst, data);     \
+        }                                                                    \
+        else {                                                               \
+            reg_no_src = jit_reg_no(r1);                                     \
+            CHECK_REG_NO(reg_no_src, JIT_REG_KIND_##kind1);                  \
+            _ret = cast_r_##type1##_to_r_##type0(a, reg_no_dst, reg_no_src); \
+        }                                                                    \
+        if (!_ret)                                                           \
+            GOTO_FAIL;                                                       \
+    } while (0)
+
+bool
+jit_codegen_gen_native(JitCompContext *cc)
+{
+    JitBasicBlock *block;
+    JitInsn *insn;
+    JitReg r0, r1, r2, r3;
+    JmpInfo jmp_info_head;
+    bh_list *jmp_info_list = (bh_list *)&jmp_info_head;
+    uint32 label_index, label_num, i;
+    uint32 *label_offsets = NULL, code_size;
+#if CODEGEN_DUMP != 0
+    uint32 code_offset = 0;
+#endif
+    bool return_value = false, is_last_insn;
+    void **jitted_addr;
+    char *code_buf, *stream;
+
+    JitErrorHandler err_handler;
+    Environment env(Arch::kX64);
+    CodeHolder code;
+    code.init(env);
+    code.setErrorHandler(&err_handler);
+    x86::Assembler a(&code);
+
+    if (BH_LIST_SUCCESS != bh_list_init(jmp_info_list)) {
+        jit_set_last_error(cc, "init jmp info list failed");
+        return false;
+    }
+
+    label_num = jit_cc_label_num(cc);
+
+    if (!(label_offsets =
+              (uint32 *)jit_calloc(((uint32)sizeof(uint32)) * label_num))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        goto fail;
+    }
+
+    for (i = 0; i < label_num; i++) {
+        if (i == 0)
+            label_index = 0;
+        else if (i == label_num - 1)
+            label_index = 1;
+        else
+            label_index = i + 1;
+
+        label_offsets[label_index] = code.sectionById(0)->buffer().size();
+
+        block = *jit_annl_basic_block(
+            cc, jit_reg_new(JIT_REG_KIND_L32, label_index));
+
+#if CODEGEN_DUMP != 0
+        os_printf("\nL%d:\n\n", label_index);
+#endif
+
+        JIT_FOREACH_INSN(block, insn)
+        {
+            is_last_insn = (insn->next == block) ? true : false;
+
+#if CODEGEN_DUMP != 0
+            os_printf("\n");
+            jit_dump_insn(cc, insn);
+#endif
+            switch (insn->opcode) {
+                case JIT_OP_MOV:
+                    LOAD_2ARGS();
+                    if (!lower_mov(cc, a, r0, r1))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_I8TOI32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, i32, i8, int8);
+                    break;
+
+                case JIT_OP_I8TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, I32, i64, i8, int8);
+                    break;
+
+                case JIT_OP_I16TOI32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, i32, i16, int16);
+                    break;
+
+                case JIT_OP_I16TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, I32, i64, i16, int16);
+                    break;
+
+                case JIT_OP_I32TOI8:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, i8, i32, int32);
+                    break;
+
+                case JIT_OP_I32TOU8:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, u8, i32, int32);
+                    break;
+
+                case JIT_OP_I32TOI16:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, i16, i32, int32);
+                    break;
+
+                case JIT_OP_I32TOU16:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I32, u16, i32, int32);
+                    break;
+
+                case JIT_OP_I32TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, I32, i64, i32, int32);
+                    break;
+
+                case JIT_OP_U32TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, I32, i64, u32, int32);
+                    break;
+
+                case JIT_OP_I32TOF32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F32, I32, f32, i32, int32);
+                    break;
+
+                case JIT_OP_U32TOF32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F32, I32, f32, u32, uint32);
+                    break;
+
+                case JIT_OP_I32TOF64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F64, I32, f64, i32, int32);
+                    break;
+
+                case JIT_OP_U32TOF64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F64, I32, f64, u32, uint32);
+                    break;
+
+                case JIT_OP_I64TOI8:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I64, i8, i64, int64);
+                    break;
+
+                case JIT_OP_I64TOI16:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I64, i16, i64, int64);
+                    break;
+
+                case JIT_OP_I64TOI32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, I64, i32, i64, int64);
+                    break;
+
+                case JIT_OP_I64TOF32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F32, I64, f32, i64, int64);
+                    break;
+
+                case JIT_OP_I64TOF64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F64, I64, f64, i64, int64);
+                    break;
+
+                case JIT_OP_F32TOI32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, F32, i32, f32, float32);
+                    break;
+
+                case JIT_OP_F32TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, F32, i64, f32, float32);
+                    break;
+
+                case JIT_OP_F32TOF64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F64, F32, f64, f32, float32);
+                    break;
+
+                case JIT_OP_F32TOU32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, F32, u32, f32, float32);
+                    break;
+
+                case JIT_OP_F64TOI32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, F64, i32, f64, float64);
+                    break;
+
+                case JIT_OP_F64TOI64:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I64, F64, i64, f64, float64);
+                    break;
+
+                case JIT_OP_F64TOF32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(F32, F64, f32, f64, float64);
+                    break;
+
+                case JIT_OP_F64TOU32:
+                    LOAD_2ARGS();
+                    CONVERT_R_R(I32, F64, u32, f64, float64);
+                    break;
+
+                case JIT_OP_NEG:
+                    LOAD_2ARGS();
+                    if (!lower_neg(cc, a, r0, r1))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_ADD:
+                case JIT_OP_SUB:
+                case JIT_OP_MUL:
+                case JIT_OP_DIV_S:
+                case JIT_OP_REM_S:
+                case JIT_OP_DIV_U:
+                case JIT_OP_REM_U:
+                    LOAD_3ARGS();
+                    if (!lower_alu(cc, a,
+                                   (ALU_OP)(ADD + (insn->opcode - JIT_OP_ADD)),
+                                   r0, r1, r2))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_SHL:
+                case JIT_OP_SHRS:
+                case JIT_OP_SHRU:
+                case JIT_OP_ROTL:
+                case JIT_OP_ROTR:
+                    LOAD_3ARGS();
+                    if (!lower_shift(
+                            cc, a,
+                            (SHIFT_OP)(SHL + (insn->opcode - JIT_OP_SHL)), r0,
+                            r1, r2))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_OR:
+                case JIT_OP_XOR:
+                case JIT_OP_AND:
+                    LOAD_3ARGS();
+                    if (!lower_bit(cc, a,
+                                   (BIT_OP)(OR + (insn->opcode - JIT_OP_OR)),
+                                   r0, r1, r2))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_CLZ:
+                case JIT_OP_CTZ:
+                case JIT_OP_POPCNT:
+                    LOAD_2ARGS();
+                    if (!lower_bitcount(
+                            cc, a,
+                            (BITCOUNT_OP)(CLZ + (insn->opcode - JIT_OP_CLZ)),
+                            r0, r1))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_CMP:
+                    LOAD_3ARGS();
+                    if (!lower_cmp(cc, a, r0, r1, r2))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_SELECTEQ:
+                case JIT_OP_SELECTNE:
+                case JIT_OP_SELECTGTS:
+                case JIT_OP_SELECTGES:
+                case JIT_OP_SELECTLTS:
+                case JIT_OP_SELECTLES:
+                case JIT_OP_SELECTGTU:
+                case JIT_OP_SELECTGEU:
+                case JIT_OP_SELECTLTU:
+                case JIT_OP_SELECTLEU:
+                    LOAD_4ARGS();
+                    if (!lower_select(
+                            cc, a,
+                            (COND_OP)(EQ + (insn->opcode - JIT_OP_SELECTEQ)),
+                            r0, r1, r2, r3))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_LDEXECENV:
+                    LOAD_1ARG();
+                    CHECK_KIND(r0, JIT_REG_KIND_I32);
+                    /* TODO */
+                    break;
+
+                case JIT_OP_LDJITINFO:
+                    LOAD_1ARG();
+                    CHECK_KIND(r0, JIT_REG_KIND_I32);
+                    /* TODO */
+                    break;
+
+                case JIT_OP_LDI8:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 1, true);
+                    else
+                        LD_R_R_R(I64, 1, true);
+                    break;
+
+                case JIT_OP_LDU8:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 1, false);
+                    else
+                        LD_R_R_R(I64, 1, false);
+                    break;
+
+                case JIT_OP_LDI16:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 2, true);
+                    else
+                        LD_R_R_R(I64, 2, true);
+                    break;
+
+                case JIT_OP_LDU16:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 2, false);
+                    else
+                        LD_R_R_R(I64, 2, false);
+                    break;
+
+                case JIT_OP_LDI32:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 4, true);
+                    else
+                        LD_R_R_R(I64, 4, true);
+                    break;
+
+                case JIT_OP_LDU32:
+                    LOAD_3ARGS();
+                    bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+                              || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+                    if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+                        LD_R_R_R(I32, 4, false);
+                    else
+                        LD_R_R_R(I64, 4, false);
+                    break;
+
+                case JIT_OP_LDI64:
+                case JIT_OP_LDU64:
+                case JIT_OP_LDPTR:
+                    LOAD_3ARGS();
+                    LD_R_R_R(I64, 8, false);
+                    break;
+
+                case JIT_OP_LDF32:
+                    LOAD_3ARGS();
+                    LD_R_R_R(F32, 4, false);
+                    break;
+
+                case JIT_OP_LDF64:
+                    LOAD_3ARGS();
+                    LD_R_R_R(F64, 8, false);
+                    break;
+
+                case JIT_OP_STI8:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(I32, int32, 1);
+                    break;
+
+                case JIT_OP_STI16:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(I32, int32, 2);
+                    break;
+
+                case JIT_OP_STI32:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(I32, int32, 4);
+                    break;
+
+                case JIT_OP_STI64:
+                case JIT_OP_STPTR:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(I64, int64, 8);
+                    break;
+
+                case JIT_OP_STF32:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(F32, float32, 4);
+                    break;
+
+                case JIT_OP_STF64:
+                    LOAD_3ARGS_NO_ASSIGN();
+                    ST_R_R_R(F64, float64, 8);
+                    break;
+
+                case JIT_OP_JMP:
+                    LOAD_1ARG();
+                    CHECK_KIND(r0, JIT_REG_KIND_L32);
+                    if (!(is_last_insn
+                          && label_is_neighboring(cc, label_index,
+                                                  jit_reg_no(r0))))
+                        JMP_TO_LABEL(jit_reg_no(r0), label_index);
+                    break;
+
+                case JIT_OP_BEQ:
+                case JIT_OP_BNE:
+                case JIT_OP_BGTS:
+                case JIT_OP_BGES:
+                case JIT_OP_BLTS:
+                case JIT_OP_BLES:
+                case JIT_OP_BGTU:
+                case JIT_OP_BGEU:
+                case JIT_OP_BLTU:
+                case JIT_OP_BLEU:
+                    LOAD_3ARGS();
+                    if (!lower_branch(
+                            cc, a, jmp_info_list, label_index,
+                            (COND_OP)(EQ + (insn->opcode - JIT_OP_BEQ)), r0, r1,
+                            r2, is_last_insn))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_LOOKUPSWITCH:
+                {
+                    JitOpndLookupSwitch *opnd = jit_insn_opndls(insn);
+                    if (!lower_lookupswitch(cc, a, jmp_info_list, label_offsets,
+                                            label_index, opnd, is_last_insn))
+                        GOTO_FAIL;
+                    break;
+                }
+
+                case JIT_OP_CALLNATIVE:
+                    if (!lower_callnative(cc, a, insn))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_CALLBC:
+                    if (!lower_callbc(cc, a, jmp_info_list, label_index, insn))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_RETURNBC:
+                    if (!lower_returnbc(cc, a, insn))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_RETURN:
+                    if (!lower_return(cc, a, insn))
+                        GOTO_FAIL;
+                    break;
+
+                case JIT_OP_I32CASTF32:
+                    LOAD_2ARGS();
+                    CAST_R_R(F32, I32, f32, i32, int32);
+                    break;
+
+                case JIT_OP_I64CASTF64:
+                    LOAD_2ARGS();
+                    CAST_R_R(F64, I64, f64, i64, int64);
+                    break;
+
+                case JIT_OP_F32CASTI32:
+                    LOAD_2ARGS();
+                    CAST_R_R(I32, F32, i32, f32, float);
+                    break;
+
+                case JIT_OP_F64CASTI64:
+                    LOAD_2ARGS();
+                    CAST_R_R(I64, F64, i64, f64, double);
+                    break;
+
+                default:
+                    jit_set_last_error_v(cc, "unsupported JIT opcode 0x%2x",
+                                         insn->opcode);
+                    GOTO_FAIL;
+            }
+
+            if (err_handler.err) {
+                jit_set_last_error_v(cc,
+                                     "failed to generate native code for JIT "
+                                     "opcode 0x%02x, ErrorCode is %u",
+                                     insn->opcode, err_handler.err);
+                GOTO_FAIL;
+            }
+
+#if CODEGEN_DUMP != 0
+            dump_native((char *)code.sectionById(0)->buffer().data()
+                            + code_offset,
+                        code.sectionById(0)->buffer().size() - code_offset);
+            code_offset = code.sectionById(0)->buffer().size();
+#endif
+        }
+    }
+
+    code_buf = (char *)code.sectionById(0)->buffer().data();
+    code_size = code.sectionById(0)->buffer().size();
+    if (!(stream = (char *)jit_code_cache_alloc(code_size))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        goto fail;
+    }
+
+    bh_memcpy_s(stream, code_size, code_buf, code_size);
+    cc->jitted_addr_begin = stream;
+    cc->jitted_addr_end = stream + code_size;
+
+    for (i = 0; i < label_num; i++) {
+        if (i == 0)
+            label_index = 0;
+        else if (i == label_num - 1)
+            label_index = 1;
+        else
+            label_index = i + 1;
+
+        jitted_addr = jit_annl_jitted_addr(
+            cc, jit_reg_new(JIT_REG_KIND_L32, label_index));
+        *jitted_addr = stream + label_offsets[label_index];
+    }
+
+    patch_jmp_info_list(cc, jmp_info_list);
+    return_value = true;
+
+fail:
+
+    jit_free(label_offsets);
+    free_jmp_info_list(jmp_info_list);
+    return return_value;
+}
+
+bool
+jit_codegen_lower(JitCompContext *cc)
+{
+    (void)cc;
+    return true;
+}
+
+void
+jit_codegen_free_native(JitCompContext *cc)
+{
+    (void)cc;
+}
+
+void
+jit_codegen_dump_native(void *begin_addr, void *end_addr)
+{
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+    os_printf("\n");
+    dump_native((char *)begin_addr, (char *)end_addr - (char *)begin_addr);
+    os_printf("\n");
+#else
+    (void)begin_addr;
+    (void)end_addr;
+#endif
+}
+
+bool
+jit_codegen_init()
+{
+    const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+    JitGlobals *jit_globals = jit_compiler_get_jit_globals();
+    char *code_buf, *stream;
+    uint32 code_size;
+
+    JitErrorHandler err_handler;
+    Environment env(Arch::kX64);
+    CodeHolder code;
+    code.init(env);
+    code.setErrorHandler(&err_handler);
+    x86::Assembler a(&code);
+
+    /* push callee-save registers */
+    a.push(x86::rbp);
+    a.push(x86::rbx);
+    a.push(x86::r12);
+    a.push(x86::r13);
+    a.push(x86::r14);
+    a.push(x86::r15);
+    /* push info */
+    a.push(x86::rsi);
+
+    /* Note: the number of register pushed must be odd, as the stack pointer
+       %rsp must be aligned to a 16-byte boundary before making a call, so
+       when a function (including this function) gets control, %rsp is not
+       aligned. We push odd number registers here to make %rsp happy before
+       calling native functions. */
+
+    /* exec_env_reg = exec_env */
+    a.mov(regs_i64[hreg_info->exec_env_hreg_index], x86::rdi);
+    /* fp_reg = info->frame */
+    a.mov(x86::rbp, x86::ptr(x86::rsi, 0));
+    /* jmp target */
+    a.jmp(x86::rdx);
+
+    if (err_handler.err)
+        return false;
+
+    code_buf = (char *)code.sectionById(0)->buffer().data();
+    code_size = code.sectionById(0)->buffer().size();
+    stream = (char *)jit_code_cache_alloc(code_size);
+    if (!stream)
+        return false;
+
+    bh_memcpy_s(stream, code_size, code_buf, code_size);
+    code_block_switch_to_jitted_from_interp = stream;
+
+#if 0
+    dump_native(stream, code_size);
+#endif
+
+    a.setOffset(0);
+
+    /* TODO: mask floating-point exception */
+    /* TODO: floating-point parameters */
+
+    /* pop info */
+    a.pop(x86::rsi);
+    /* info->frame = fp_reg */
+    {
+        x86::Mem m(x86::rsi, 0);
+        a.mov(m, x86::rbp);
+    }
+    /* info->out.ret.ival[0, 1] = rcx */
+    {
+        x86::Mem m(x86::rsi, 8);
+        a.mov(m, x86::rdx);
+    }
+    /* info->out.ret.fval[0, 1] = xmm0 */
+    {
+        x86::Mem m(x86::rsi, 16);
+        a.movsd(m, x86::xmm0);
+    }
+
+    /* pop callee-save registers */
+    a.pop(x86::r15);
+    a.pop(x86::r14);
+    a.pop(x86::r13);
+    a.pop(x86::r12);
+    a.pop(x86::rbx);
+    a.pop(x86::rbp);
+    a.ret();
+
+    if (err_handler.err)
+        goto fail1;
+
+    code_buf = (char *)code.sectionById(0)->buffer().data();
+    code_size = code.sectionById(0)->buffer().size();
+    stream = (char *)jit_code_cache_alloc(code_size);
+    if (!stream)
+        goto fail1;
+
+    bh_memcpy_s(stream, code_size, code_buf, code_size);
+    code_block_return_to_interp_from_jitted = stream;
+
+    jit_globals->return_to_interp_from_jitted =
+        code_block_return_to_interp_from_jitted;
+    return true;
+
+fail1:
+    jit_code_cache_free(code_block_switch_to_jitted_from_interp);
+    return false;
+}
+
+void
+jit_codegen_destroy()
+{
+    jit_code_cache_free(code_block_switch_to_jitted_from_interp);
+    jit_code_cache_free(code_block_return_to_interp_from_jitted);
+}
+
+/* clang-format off */
+static const uint8 hreg_info_I32[3][7] = {
+    /* ebp, eax, ebx, ecx, edx, edi, esi */
+    { 1, 0, 0, 0, 0, 0, 1 }, /* fixed, esi is freely used */
+    { 0, 1, 0, 1, 1, 1, 0 }, /* caller_saved_native */
+    { 0, 1, 1, 1, 1, 1, 0 }  /* caller_saved_jitted */
+};
+
+static const uint8 hreg_info_I64[3][16] = {
+    /* rbp, rax, rbx, rcx, rdx, rdi, rsi, rsp,
+       r8,  r9,  r10, r11, r12, r13, r14, r15 */
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      0, 0, 0, 0, 0, 0, 0, 1 }, /* fixed, rsi is freely used */
+    { 0, 1, 0, 1, 1, 1, 0, 0,
+      1, 1, 1, 1, 0, 0, 0, 0 }, /* caller_saved_native */
+    { 0, 1, 1, 1, 1, 1, 0, 0,
+      1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
+static uint8 hreg_info_F32[3][16] = {
+    /* xmm0 ~ xmm15 */
+    { 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 1, 1, 1, 1, 1, 1, 1 },
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
+static uint8 hreg_info_F64[3][16] = {
+    /* xmm0 ~ xmm15 */
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      0, 0, 0, 0, 0, 0, 0, 1 },
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
+    { 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+static const JitHardRegInfo hreg_info = {
+    {
+        { 0, NULL, NULL, NULL }, /* VOID */
+
+        { sizeof(hreg_info_I32[0]), /* I32 */
+          hreg_info_I32[0],
+          hreg_info_I32[1],
+          hreg_info_I32[2] },
+
+        { sizeof(hreg_info_I64[0]), /* I64 */
+          hreg_info_I64[0],
+          hreg_info_I64[1],
+          hreg_info_I64[2] },
+
+        { sizeof(hreg_info_F32[0]), /* F32 */
+          hreg_info_F32[0],
+          hreg_info_F32[1],
+          hreg_info_F32[2] },
+
+        { sizeof(hreg_info_F64[0]), /* F64 */
+          hreg_info_F64[0],
+          hreg_info_F64[1],
+          hreg_info_F64[2] },
+
+        { 0, NULL, NULL, NULL }, /* V8 */
+        { 0, NULL, NULL, NULL }, /* V16 */
+        { 0, NULL, NULL, NULL }  /* V32 */
+    },
+    /* frame pointer hreg index: rbp */
+    0,
+    /* exec_env hreg index: r15 */
+    15,
+    /* cmp hreg index: esi */
+    6
+};
+/* clang-format on */
+
+const JitHardRegInfo *
+jit_codegen_get_hreg_info()
+{
+    return &hreg_info;
+}
+
+static const char *reg_names_i32[] = {
+    "ebp", "eax", "ebx", "ecx", "edx", "edi", "esi", "esp",
+};
+
+static const char *reg_names_i64[] = {
+    "rbp", "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "rsp",
+    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+static const char *reg_names_f32[] = { "xmm0",  "xmm1",  "xmm2",  "xmm3",
+                                       "xmm4",  "xmm5",  "xmm6",  "xmm7",
+                                       "xmm8",  "xmm9",  "xmm10", "xmm11",
+                                       "xmm12", "xmm13", "xmm14", "xmm15" };
+
+static const char *reg_names_f64[] = {
+    "xmm0_f64",  "xmm1_f64",  "xmm2_f64",  "xmm3_f64", "xmm4_f64",  "xmm5_f64",
+    "xmm6_f64",  "xmm7_f64",  "xmm8_f64",  "xmm9_f64", "xmm10_f64", "xmm11_f64",
+    "xmm12_f64", "xmm13_f64", "xmm14_f64", "xmm15_f64"
+};
+
+JitReg
+jit_codegen_get_hreg_by_name(const char *name)
+{
+    size_t i;
+
+    if (name[0] == 'e') {
+        for (i = 0; i < sizeof(reg_names_i32) / sizeof(char *); i++)
+            if (!strcmp(reg_names_i32[i], name))
+                return jit_reg_new(JIT_REG_KIND_I32, i);
+    }
+    else if (name[0] == 'r') {
+        for (i = 0; i < sizeof(reg_names_i64) / sizeof(char *); i++)
+            if (!strcmp(reg_names_i64[i], name))
+                return jit_reg_new(JIT_REG_KIND_I64, i);
+    }
+    else if (!strncmp(name, "xmm", 3)) {
+        if (!strstr(name, "_f64")) {
+            for (i = 0; i < sizeof(reg_names_f32) / sizeof(char *); i++)
+                if (!strcmp(reg_names_f32[i], name))
+                    return jit_reg_new(JIT_REG_KIND_F32, i);
+        }
+        else {
+            for (i = 0; i < sizeof(reg_names_f64) / sizeof(char *); i++)
+                if (!strcmp(reg_names_f64[i], name))
+                    return jit_reg_new(JIT_REG_KIND_F64, i);
+        }
+    }
+    return 0;
+}

+ 345 - 0
core/iwasm/fast-jit/fe/jit_emit_compare.c

@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_compare.h"
+#include "jit_emit_function.h"
+#include "../jit_frontend.h"
+#include "../jit_codegen.h"
+
+static bool
+jit_compile_op_compare_integer(JitCompContext *cc, IntCond cond, bool is64Bit)
+{
+    JitReg lhs, rhs, res, const_zero, const_one;
+
+    if (cond < INT_EQZ || cond > INT_GE_U) {
+        jit_set_last_error(cc, "unsupported comparation operation");
+        goto fail;
+    }
+
+    res = jit_cc_new_reg_I32(cc);
+    const_zero = NEW_CONST(I32, 0);
+    const_one = NEW_CONST(I32, 1);
+
+    if (is64Bit) {
+        if (INT_EQZ == cond) {
+            rhs = NEW_CONST(I64, 0);
+        }
+        else {
+            POP_I64(rhs);
+        }
+        POP_I64(lhs);
+    }
+    else {
+        if (INT_EQZ == cond) {
+            rhs = NEW_CONST(I32, 0);
+        }
+        else {
+            POP_I32(rhs);
+        }
+        POP_I32(lhs);
+    }
+
+    GEN_INSN(CMP, cc->cmp_reg, lhs, rhs);
+    switch (cond) {
+        case INT_EQ:
+        case INT_EQZ:
+        {
+            GEN_INSN(SELECTEQ, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_NE:
+        {
+            GEN_INSN(SELECTNE, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_LT_S:
+        {
+            GEN_INSN(SELECTLTS, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_LT_U:
+        {
+            GEN_INSN(SELECTLTU, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_GT_S:
+        {
+            GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_GT_U:
+        {
+            GEN_INSN(SELECTGTU, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_LE_S:
+        {
+            GEN_INSN(SELECTLES, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_LE_U:
+        {
+            GEN_INSN(SELECTLEU, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        case INT_GE_S:
+        {
+            GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+        default: /* INT_GE_U */
+        {
+            GEN_INSN(SELECTGEU, res, cc->cmp_reg, const_one, const_zero);
+            break;
+        }
+    }
+
+    PUSH_I32(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_compare(JitCompContext *cc, IntCond cond)
+{
+    return jit_compile_op_compare_integer(cc, cond, false);
+}
+
+bool
+jit_compile_op_i64_compare(JitCompContext *cc, IntCond cond)
+{
+    return jit_compile_op_compare_integer(cc, cond, true);
+}
+
+static int32
+float_cmp_eq(float f1, float f2)
+{
+    if (isnan(f1) || isnan(f2))
+        return 0;
+
+    return f1 == f2;
+}
+
+static int32
+float_cmp_ne(float f1, float f2)
+{
+    if (isnan(f1) || isnan(f2))
+        return 1;
+
+    return f1 != f2;
+}
+
+static int32
+double_cmp_eq(double d1, double d2)
+{
+    if (isnan(d1) || isnan(d2))
+        return 0;
+
+    return d1 == d2;
+}
+
+static int32
+double_cmp_ne(double d1, double d2)
+{
+    if (isnan(d1) || isnan(d2))
+        return 1;
+
+    return d1 != d2;
+}
+
+static bool
+jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond,
+                                   JitReg lhs, JitReg rhs)
+{
+    JitReg res, args[2], const_zero, const_one;
+    JitRegKind kind;
+    void *func;
+
+    if (cond == FLOAT_EQ || cond == FLOAT_NE) {
+        kind = jit_reg_kind(lhs);
+        if (cond == FLOAT_EQ)
+            func = (kind == JIT_REG_KIND_F32) ? (void *)float_cmp_eq
+                                              : (void *)double_cmp_eq;
+        else
+            func = (kind == JIT_REG_KIND_F32) ? (void *)float_cmp_ne
+                                              : (void *)double_cmp_ne;
+
+        res = jit_cc_new_reg_I32(cc);
+        args[0] = lhs;
+        args[1] = rhs;
+
+        if (!jit_emit_callnative(cc, func, res, args, 2)) {
+            goto fail;
+        }
+    }
+    else {
+        res = jit_cc_new_reg_I32(cc);
+        const_zero = NEW_CONST(I32, 0);
+        const_one = NEW_CONST(I32, 1);
+        switch (cond) {
+            case FLOAT_LT:
+            {
+                GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
+                GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero);
+                break;
+            }
+            case FLOAT_GT:
+            {
+                GEN_INSN(CMP, cc->cmp_reg, lhs, rhs);
+                GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero);
+                break;
+            }
+            case FLOAT_LE:
+            {
+                GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
+                GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero);
+                break;
+            }
+            case FLOAT_GE:
+            {
+                GEN_INSN(CMP, cc->cmp_reg, lhs, rhs);
+                GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero);
+                break;
+            }
+            default:
+            {
+                bh_assert(!"unknown FloatCond");
+                goto fail;
+            }
+        }
+    }
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_compare(JitCompContext *cc, FloatCond cond)
+{
+    JitReg res, const_zero, const_one;
+    JitReg lhs, rhs;
+
+    POP_F32(rhs);
+    POP_F32(lhs);
+
+    if (jit_reg_is_const_val(lhs) && jit_reg_is_const_val(rhs)) {
+        float32 lvalue = jit_cc_get_const_F32(cc, lhs);
+        float32 rvalue = jit_cc_get_const_F32(cc, rhs);
+
+        const_zero = NEW_CONST(I32, 0);
+        const_one = NEW_CONST(I32, 1);
+
+        switch (cond) {
+            case FLOAT_EQ:
+            {
+                res = (lvalue == rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_NE:
+            {
+                res = (lvalue != rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_LT:
+            {
+                res = (lvalue < rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_GT:
+            {
+                res = (lvalue > rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_LE:
+            {
+                res = (lvalue <= rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_GE:
+            {
+                res = (lvalue >= rvalue) ? const_one : const_zero;
+                break;
+            }
+            default:
+            {
+                bh_assert(!"unknown FloatCond");
+                goto fail;
+            }
+        }
+
+        PUSH_I32(res);
+        return true;
+    }
+
+    return jit_compile_op_compare_float_point(cc, cond, lhs, rhs);
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_compare(JitCompContext *cc, FloatCond cond)
+{
+    JitReg res, const_zero, const_one;
+    JitReg lhs, rhs;
+
+    POP_F64(rhs);
+    POP_F64(lhs);
+
+    if (jit_reg_is_const_val(lhs) && jit_reg_is_const_val(rhs)) {
+        float64 lvalue = jit_cc_get_const_F64(cc, lhs);
+        float64 rvalue = jit_cc_get_const_F64(cc, rhs);
+
+        const_zero = NEW_CONST(I32, 0);
+        const_one = NEW_CONST(I32, 1);
+
+        switch (cond) {
+            case FLOAT_EQ:
+            {
+                res = (lvalue == rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_NE:
+            {
+                res = (lvalue != rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_LT:
+            {
+                res = (lvalue < rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_GT:
+            {
+                res = (lvalue > rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_LE:
+            {
+                res = (lvalue <= rvalue) ? const_one : const_zero;
+                break;
+            }
+            case FLOAT_GE:
+            {
+                res = (lvalue >= rvalue) ? const_one : const_zero;
+                break;
+            }
+            default:
+            {
+                bh_assert(!"unknown FloatCond");
+                goto fail;
+            }
+        }
+
+        PUSH_I32(res);
+        return true;
+    }
+
+    return jit_compile_op_compare_float_point(cc, cond, lhs, rhs);
+fail:
+    return false;
+}

+ 32 - 0
core/iwasm/fast-jit/fe/jit_emit_compare.h

@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_COMPARE_H_
+#define _JIT_EMIT_COMPARE_H_
+
+#include "../jit_compiler.h"
+#include "../jit_frontend.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_i32_compare(JitCompContext *cc, IntCond cond);
+
+bool
+jit_compile_op_i64_compare(JitCompContext *cc, IntCond cond);
+
+bool
+jit_compile_op_f32_compare(JitCompContext *cc, FloatCond cond);
+
+bool
+jit_compile_op_f64_compare(JitCompContext *cc, FloatCond cond);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_COMPARE_H_ */

+ 47 - 0
core/iwasm/fast-jit/fe/jit_emit_const.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_const.h"
+#include "../jit_frontend.h"
+
+bool
+jit_compile_op_i32_const(JitCompContext *cc, int32 i32_const)
+{
+    JitReg value = NEW_CONST(I32, i32_const);
+    PUSH_I32(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_const(JitCompContext *cc, int64 i64_const)
+{
+    JitReg value = NEW_CONST(I64, i64_const);
+    PUSH_I64(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_const(JitCompContext *cc, float32 f32_const)
+{
+    JitReg value = NEW_CONST(F32, f32_const);
+    PUSH_F32(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_const(JitCompContext *cc, float64 f64_const)
+{
+    JitReg value = NEW_CONST(F64, f64_const);
+    PUSH_F64(value);
+    return true;
+fail:
+    return false;
+}

+ 31 - 0
core/iwasm/fast-jit/fe/jit_emit_const.h

@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_CONST_H_
+#define _JIT_EMIT_CONST_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_i32_const(JitCompContext *cc, int32 i32_const);
+
+bool
+jit_compile_op_i64_const(JitCompContext *cc, int64 i64_const);
+
+bool
+jit_compile_op_f32_const(JitCompContext *cc, float32 f32_const);
+
+bool
+jit_compile_op_f64_const(JitCompContext *cc, float64 f64_const);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_CONST_H_ */

+ 1209 - 0
core/iwasm/fast-jit/fe/jit_emit_control.c

@@ -0,0 +1,1209 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_control.h"
+#include "jit_emit_exception.h"
+#include "../jit_frontend.h"
+#include "../interpreter/wasm_loader.h"
+
+#define CREATE_BASIC_BLOCK(new_basic_block)                       \
+    do {                                                          \
+        bh_assert(!new_basic_block);                              \
+        if (!(new_basic_block = jit_cc_new_basic_block(cc, 0))) { \
+            jit_set_last_error(cc, "create basic block failed");  \
+            goto fail;                                            \
+        }                                                         \
+    } while (0)
+
+#define CURR_BASIC_BLOCK() cc->cur_basic_block
+
+#define BUILD_BR(target_block)                                     \
+    do {                                                           \
+        if (!GEN_INSN(JMP, jit_basic_block_label(target_block))) { \
+            jit_set_last_error(cc, "generate jmp insn failed");    \
+            goto fail;                                             \
+        }                                                          \
+    } while (0)
+
+#define BUILD_COND_BR(value_if, block_then, block_else)                       \
+    do {                                                                      \
+        if (!GEN_INSN(CMP, cc->cmp_reg, value_if, NEW_CONST(cc, 0))           \
+            || !GEN_INSN(BNE, cc->cmp_reg, jit_basic_block_label(block_then), \
+                         jit_basic_block_label(block_else))) {                \
+            jit_set_last_error(cc, "generate bne insn failed");               \
+            goto fail;                                                        \
+        }                                                                     \
+    } while (0)
+
+#define SET_BUILDER_POS(basic_block)       \
+    do {                                   \
+        cc->cur_basic_block = basic_block; \
+    } while (0)
+
+#define SET_BB_BEGIN_BCIP(basic_block, bcip)                                   \
+    do {                                                                       \
+        *(jit_annl_begin_bcip(cc, jit_basic_block_label(basic_block))) = bcip; \
+    } while (0)
+
+#define SET_BB_END_BCIP(basic_block, bcip)                                   \
+    do {                                                                     \
+        *(jit_annl_end_bcip(cc, jit_basic_block_label(basic_block))) = bcip; \
+    } while (0)
+
+static JitBlock *
+get_target_block(JitCompContext *cc, uint32 br_depth)
+{
+    uint32 i = br_depth;
+    JitBlock *block = jit_block_stack_top(&cc->block_stack);
+
+    while (i-- > 0 && block) {
+        block = block->prev;
+    }
+
+    if (!block) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return NULL;
+    }
+    return block;
+}
+
+static bool
+load_block_params(JitCompContext *cc, JitBlock *block)
+{
+    JitFrame *jit_frame = cc->jit_frame;
+    uint32 offset, i;
+    JitReg value = 0;
+
+    /* Clear jit frame's locals and stacks */
+    clear_values(jit_frame);
+
+    /* Restore jit frame's sp to block's sp begin */
+    jit_frame->sp = block->frame_sp_begin;
+
+    /* Load params to new block */
+    offset = (uint32)(jit_frame->sp - jit_frame->lp);
+    for (i = 0; i < block->param_count; i++) {
+        switch (block->param_types[i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+                value = gen_load_i32(jit_frame, offset);
+                offset++;
+                break;
+            case VALUE_TYPE_I64:
+                value = gen_load_i64(jit_frame, offset);
+                offset += 2;
+                break;
+            case VALUE_TYPE_F32:
+                value = gen_load_f32(jit_frame, offset);
+                offset++;
+                break;
+            case VALUE_TYPE_F64:
+                value = gen_load_f64(jit_frame, offset);
+                offset += 2;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+        PUSH(value, block->param_types[i]);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+static bool
+load_block_results(JitCompContext *cc, JitBlock *block)
+{
+    JitFrame *jit_frame = cc->jit_frame;
+    uint32 offset, i;
+    JitReg value = 0;
+
+    /* Restore jit frame's sp to block's sp begin */
+    jit_frame->sp = block->frame_sp_begin;
+
+    /* Load results to new block */
+    offset = (uint32)(jit_frame->sp - jit_frame->lp);
+    for (i = 0; i < block->result_count; i++) {
+        switch (block->result_types[i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+                value = gen_load_i32(jit_frame, offset);
+                offset++;
+                break;
+            case VALUE_TYPE_I64:
+                value = gen_load_i64(jit_frame, offset);
+                offset += 2;
+                break;
+            case VALUE_TYPE_F32:
+                value = gen_load_f32(jit_frame, offset);
+                offset++;
+                break;
+            case VALUE_TYPE_F64:
+                value = gen_load_f64(jit_frame, offset);
+                offset += 2;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+        PUSH(value, block->result_types[i]);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+static bool
+jit_reg_is_i32_const(JitCompContext *cc, JitReg reg, int32 val)
+{
+    return (jit_reg_kind(reg) == JIT_REG_KIND_I32 && jit_reg_is_const(reg)
+            && jit_cc_get_const_I32(cc, reg) == val)
+               ? true
+               : false;
+}
+
+/**
+ * get the last two insns:
+ *     CMP cmp_reg, r0, r1
+ *     SELECTcc r2, cmp_reg, 1, 0
+ */
+static void
+get_last_cmp_and_selectcc(JitCompContext *cc, JitReg cond, JitInsn **p_insn_cmp,
+                          JitInsn **p_insn_select)
+{
+    JitInsn *insn = jit_basic_block_last_insn(cc->cur_basic_block);
+
+    if (insn && insn->prev && insn->prev->opcode == JIT_OP_CMP
+        && insn->opcode >= JIT_OP_SELECTEQ && insn->opcode <= JIT_OP_SELECTLEU
+        && *jit_insn_opnd(insn, 0) == cond
+        && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 2), 1)
+        && jit_reg_is_i32_const(cc, *jit_insn_opnd(insn, 3), 0)) {
+        *p_insn_cmp = insn->prev;
+        *p_insn_select = insn;
+    }
+}
+
+static bool
+push_jit_block_to_stack_and_pass_params(JitCompContext *cc, JitBlock *block,
+                                        JitBasicBlock *basic_block, JitReg cond,
+                                        bool merge_cmp_and_if)
+{
+    JitFrame *jit_frame = cc->jit_frame;
+    JitValue *value_list_head = NULL, *value_list_end = NULL, *jit_value;
+    JitInsn *insn;
+    JitReg value;
+    uint32 i, param_index, cell_num;
+
+    if (cc->cur_basic_block == basic_block) {
+        /* Reuse the current basic block and no need to commit values,
+           we just move param values from current block's value stack to
+           the new block's value stack */
+        for (i = 0; i < block->param_count; i++) {
+            jit_value = jit_value_stack_pop(
+                &jit_block_stack_top(&cc->block_stack)->value_stack);
+            if (!value_list_head) {
+                value_list_head = value_list_end = jit_value;
+                jit_value->prev = jit_value->next = NULL;
+            }
+            else {
+                jit_value->prev = NULL;
+                jit_value->next = value_list_head;
+                value_list_head->prev = jit_value;
+                value_list_head = jit_value;
+            }
+        }
+        block->value_stack.value_list_head = value_list_head;
+        block->value_stack.value_list_end = value_list_end;
+
+        /* Save block's begin frame sp */
+        cell_num = wasm_get_cell_num(block->param_types, block->param_count);
+        block->frame_sp_begin = jit_frame->sp - cell_num;
+
+        /* Push the new block to block stack */
+        jit_block_stack_push(&cc->block_stack, block);
+
+        /* Continue to translate current block */
+    }
+    else {
+        JitInsn *insn_select = NULL, *insn_cmp = NULL;
+
+        if (merge_cmp_and_if) {
+            get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select);
+        }
+
+        /* Commit register values to locals and stacks */
+        gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
+
+        /* Pop param values from current block's value stack */
+        for (i = 0; i < block->param_count; i++) {
+            param_index = block->param_count - 1 - i;
+            POP(value, block->param_types[param_index]);
+        }
+
+        /* Clear frame values */
+        clear_values(jit_frame);
+        /* Save block's begin frame sp */
+        block->frame_sp_begin = jit_frame->sp;
+
+        /* Push the new block to block stack */
+        jit_block_stack_push(&cc->block_stack, block);
+
+        if (block->label_type == LABEL_TYPE_LOOP) {
+            BUILD_BR(basic_block);
+        }
+        else {
+            /* IF block with condition br insn */
+            if (insn_select && insn_cmp) {
+                /* Change `CMP + SELECTcc` into `CMP + Bcc` */
+                if (!(insn = GEN_INSN(BEQ, cc->cmp_reg,
+                                      jit_basic_block_label(basic_block), 0))) {
+                    jit_set_last_error(cc, "generate cond br failed");
+                    goto fail;
+                }
+                insn->opcode =
+                    JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
+                jit_insn_unlink(insn_select);
+                jit_insn_delete(insn_select);
+            }
+            else {
+                if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))
+                    || !(insn =
+                             GEN_INSN(BNE, cc->cmp_reg,
+                                      jit_basic_block_label(basic_block), 0))) {
+                    jit_set_last_error(cc, "generate cond br failed");
+                    goto fail;
+                }
+            }
+
+            /* Don't create else basic block or end basic block now, just
+               save its incoming BNE insn, and patch the insn's else label
+               when the basic block is lazily created */
+            if (block->wasm_code_else) {
+                block->incoming_insn_for_else_bb = insn;
+            }
+            else {
+                if (!jit_block_add_incoming_insn(block, insn, 2)) {
+                    jit_set_last_error(cc, "add incoming insn failed");
+                    goto fail;
+                }
+            }
+        }
+
+        /* Start to translate the block */
+        SET_BUILDER_POS(basic_block);
+
+        /* Push the block parameters */
+        if (!load_block_params(cc, block)) {
+            goto fail;
+        }
+    }
+    return true;
+fail:
+    return false;
+}
+
+static void
+copy_block_arities(JitCompContext *cc, JitReg dst_frame_sp, uint8 *dst_types,
+                   uint32 dst_type_count, JitReg *p_first_res_reg)
+{
+    JitFrame *jit_frame;
+    uint32 offset_src, offset_dst, i;
+    JitReg value;
+
+    jit_frame = cc->jit_frame;
+    offset_src = (uint32)(jit_frame->sp - jit_frame->lp)
+                 - wasm_get_cell_num(dst_types, dst_type_count);
+    offset_dst = 0;
+
+    /* pop values from stack and store to dest frame */
+    for (i = 0; i < dst_type_count; i++) {
+        switch (dst_types[i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+                value = gen_load_i32(jit_frame, offset_src);
+                if (i == 0 && p_first_res_reg)
+                    *p_first_res_reg = value;
+                else
+                    GEN_INSN(STI32, value, dst_frame_sp,
+                             NEW_CONST(I32, offset_dst * 4));
+                offset_src++;
+                offset_dst++;
+                break;
+            case VALUE_TYPE_I64:
+                value = gen_load_i64(jit_frame, offset_src);
+                if (i == 0 && p_first_res_reg)
+                    *p_first_res_reg = value;
+                else
+                    GEN_INSN(STI64, value, dst_frame_sp,
+                             NEW_CONST(I32, offset_dst * 4));
+                offset_src += 2;
+                offset_dst += 2;
+                break;
+            case VALUE_TYPE_F32:
+                value = gen_load_f32(jit_frame, offset_src);
+                if (i == 0 && p_first_res_reg)
+                    *p_first_res_reg = value;
+                else
+                    GEN_INSN(STF32, value, dst_frame_sp,
+                             NEW_CONST(I32, offset_dst * 4));
+                offset_src++;
+                offset_dst++;
+                break;
+            case VALUE_TYPE_F64:
+                value = gen_load_f64(jit_frame, offset_src);
+                if (i == 0 && p_first_res_reg)
+                    *p_first_res_reg = value;
+                else
+                    GEN_INSN(STF64, value, dst_frame_sp,
+                             NEW_CONST(I32, offset_dst * 4));
+                offset_src += 2;
+                offset_dst += 2;
+                break;
+            default:
+                bh_assert(0);
+                break;
+        }
+    }
+}
+
+static void
+handle_func_return(JitCompContext *cc, JitBlock *block)
+{
+    JitReg prev_frame, prev_frame_sp;
+    JitReg ret_reg = 0;
+
+    prev_frame = jit_cc_new_reg_ptr(cc);
+    prev_frame_sp = jit_cc_new_reg_ptr(cc);
+
+    /* prev_frame = cur_frame->prev_frame */
+    GEN_INSN(LDPTR, prev_frame, cc->fp_reg,
+             NEW_CONST(I32, offsetof(WASMInterpFrame, prev_frame)));
+    GEN_INSN(LDPTR, prev_frame_sp, prev_frame,
+             NEW_CONST(I32, offsetof(WASMInterpFrame, sp)));
+
+    if (block->result_count) {
+        uint32 cell_num =
+            wasm_get_cell_num(block->result_types, block->result_count);
+
+        copy_block_arities(cc, prev_frame_sp, block->result_types,
+                           block->result_count, &ret_reg);
+        /* prev_frame->sp += cell_num */
+        GEN_INSN(ADD, prev_frame_sp, prev_frame_sp,
+                 NEW_CONST(PTR, cell_num * 4));
+        GEN_INSN(STPTR, prev_frame_sp, prev_frame,
+                 NEW_CONST(I32, offsetof(WASMInterpFrame, sp)));
+    }
+
+    /* Free stack space of the current frame:
+       exec_env->wasm_stack.s.top = cur_frame */
+    GEN_INSN(STPTR, cc->fp_reg, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top)));
+    /* Set the prev_frame as the current frame:
+       exec_env->cur_frame = prev_frame */
+    GEN_INSN(STPTR, prev_frame, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, cur_frame)));
+    /* fp_reg = prev_frame */
+    GEN_INSN(MOV, cc->fp_reg, prev_frame);
+    /* return 0 */
+    GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), ret_reg, 0);
+}
+
+/**
+ * is_block_polymorphic: whether current block's stack is in polymorphic state,
+ * if the opcode is one of unreachable/br/br_table/return, stack is marked
+ * to polymorphic state until the block's 'end' opcode is processed
+ */
+static bool
+handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
+{
+    JitFrame *jit_frame = cc->jit_frame;
+    JitBlock *block, *block_prev;
+    JitIncomingInsn *incoming_insn;
+    JitInsn *insn;
+
+    /* Check block stack */
+    if (!(block = jit_block_stack_top(&cc->block_stack))) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+
+    if (!block->incoming_insns_for_end_bb) {
+        /* No other basic blocks jumping to this end, no need to
+           create the end basic block, just continue to translate
+           the following opcodes */
+        if (block->label_type == LABEL_TYPE_FUNCTION) {
+            handle_func_return(cc, block);
+            SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+            clear_values(jit_frame);
+        }
+        else if (block->result_count > 0) {
+            JitValue *value_list_head = NULL, *value_list_end = NULL;
+            JitValue *jit_value;
+            uint32 i;
+
+            /* No need to change cc->jit_frame, just move result values
+               from current block's value stack to previous block's
+               value stack */
+            block_prev = block->prev;
+
+            for (i = 0; i < block->result_count; i++) {
+                jit_value = jit_value_stack_pop(&block->value_stack);
+                bh_assert(jit_value);
+                if (!value_list_head) {
+                    value_list_head = value_list_end = jit_value;
+                    jit_value->prev = jit_value->next = NULL;
+                }
+                else {
+                    jit_value->prev = NULL;
+                    jit_value->next = value_list_head;
+                    value_list_head->prev = jit_value;
+                    value_list_head = jit_value;
+                }
+            }
+
+            if (!block_prev->value_stack.value_list_head) {
+                block_prev->value_stack.value_list_head = value_list_head;
+                block_prev->value_stack.value_list_end = value_list_end;
+            }
+            else {
+                /* Link to the end of previous block's value stack */
+                block_prev->value_stack.value_list_end->next = value_list_head;
+                value_list_head->prev = block_prev->value_stack.value_list_end;
+                block_prev->value_stack.value_list_end = value_list_end;
+            }
+        }
+
+        /* Pop block and destroy the block */
+        block = jit_block_stack_pop(&cc->block_stack);
+        jit_block_destroy(block);
+        return true;
+    }
+    else {
+        /* Commit register values to locals and stacks */
+        gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
+        /* Clear frame values */
+        clear_values(jit_frame);
+
+        /* Create the end basic block */
+        CREATE_BASIC_BLOCK(block->basic_block_end);
+        SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+        SET_BB_BEGIN_BCIP(block->basic_block_end, *p_frame_ip);
+        /* No need to create 'JMP' insn if block is in stack polymorphic
+           state, as previous br/br_table opcode has created 'JMP' insn
+           to this end basic block */
+        if (!is_block_polymorphic) {
+            /* Jump to the end basic block */
+            BUILD_BR(block->basic_block_end);
+        }
+
+        /* Patch the INSNs which jump to this basic block */
+        incoming_insn = block->incoming_insns_for_end_bb;
+        while (incoming_insn) {
+            insn = incoming_insn->insn;
+
+            bh_assert(
+                insn->opcode == JIT_OP_JMP
+                || (insn->opcode >= JIT_OP_BEQ && insn->opcode <= JIT_OP_BLEU)
+                || insn->opcode == JIT_OP_LOOKUPSWITCH);
+
+            if (insn->opcode == JIT_OP_JMP
+                || (insn->opcode >= JIT_OP_BEQ
+                    && insn->opcode <= JIT_OP_BLEU)) {
+                *(jit_insn_opnd(insn, incoming_insn->opnd_idx)) =
+                    jit_basic_block_label(block->basic_block_end);
+            }
+            else {
+                /* Patch LOOKUPSWITCH INSN */
+                JitOpndLookupSwitch *opnd = jit_insn_opndls(insn);
+                if (incoming_insn->opnd_idx < opnd->match_pairs_num) {
+                    opnd->match_pairs[incoming_insn->opnd_idx].target =
+                        jit_basic_block_label(block->basic_block_end);
+                }
+                else {
+                    opnd->default_target =
+                        jit_basic_block_label(block->basic_block_end);
+                }
+            }
+
+            incoming_insn = incoming_insn->next;
+        }
+
+        SET_BUILDER_POS(block->basic_block_end);
+
+        /* Pop block and load block results */
+        block = jit_block_stack_pop(&cc->block_stack);
+
+        if (block->label_type == LABEL_TYPE_FUNCTION) {
+            handle_func_return(cc, block);
+            SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+            clear_values(jit_frame);
+        }
+        else {
+            if (!load_block_results(cc, block)) {
+                jit_block_destroy(block);
+                goto fail;
+            }
+        }
+
+        jit_block_destroy(block);
+        return true;
+    }
+    return true;
+fail:
+    return false;
+}
+
+/**
+ * is_block_polymorphic: whether current block's stack is in polymorphic state,
+ * if the opcode is one of unreachable/br/br_table/return, stack is marked
+ * to polymorphic state until the block's 'end' opcode is processed
+ */
+static bool
+handle_op_else(JitCompContext *cc, uint8 **p_frame_ip,
+               bool is_block_polymorphic)
+{
+    JitBlock *block = jit_block_stack_top(&cc->block_stack);
+    JitFrame *jit_frame = cc->jit_frame;
+    JitInsn *insn;
+
+    /* Check block */
+    if (!block) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+    if (block->label_type != LABEL_TYPE_IF) {
+        jit_set_last_error(cc, "Invalid WASM block type");
+        return false;
+    }
+
+    if (!block->incoming_insn_for_else_bb) {
+        /* The if branch is handled like OP_BLOCK (cond is const and != 0),
+           just skip the else branch and handle OP_END */
+        *p_frame_ip = block->wasm_code_end + 1;
+        return handle_op_end(cc, p_frame_ip, false);
+    }
+    else {
+        /* Has else branch and need to translate else branch */
+
+        /* Commit register values to locals and stacks */
+        gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
+        /* Clear frame values */
+        clear_values(jit_frame);
+
+        /* No need to create 'JMP' insn if block is in stack polymorphic
+           state, as previous br/br_table opcode has created 'JMP' insn
+           to this end basic block */
+        if (!is_block_polymorphic) {
+            /* Jump to end basic block */
+            if (!(insn = GEN_INSN(JMP, 0))) {
+                jit_set_last_error(cc, "generate jmp insn failed");
+                return false;
+            }
+            if (!jit_block_add_incoming_insn(block, insn, 0)) {
+                jit_set_last_error(cc, "add incoming insn failed");
+                return false;
+            }
+        }
+
+        /* Clear value stack, restore param values and
+           start to translate the else branch. */
+        jit_value_stack_destroy(&block->value_stack);
+
+        /* create else basic block */
+        CREATE_BASIC_BLOCK(block->basic_block_else);
+        SET_BB_END_BCIP(block->basic_block_entry, *p_frame_ip - 1);
+        SET_BB_BEGIN_BCIP(block->basic_block_else, *p_frame_ip);
+
+        /* Patch the insn which conditionly jumps to the else basic block */
+        insn = block->incoming_insn_for_else_bb;
+        *(jit_insn_opnd(insn, 2)) =
+            jit_basic_block_label(block->basic_block_else);
+
+        SET_BUILDER_POS(block->basic_block_else);
+
+        /* Reload block parameters */
+        if (!load_block_params(cc, block)) {
+            return false;
+        }
+
+        return true;
+    }
+    return true;
+fail:
+    return false;
+}
+
+static bool
+handle_next_reachable_block(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    JitBlock *block = jit_block_stack_top(&cc->block_stack);
+
+    bh_assert(block);
+
+    do {
+        if (block->label_type == LABEL_TYPE_IF
+            && block->incoming_insn_for_else_bb
+            && *p_frame_ip <= block->wasm_code_else) {
+            /* Else branch hasn't been translated,
+               start to translate the else branch */
+            *p_frame_ip = block->wasm_code_else + 1;
+            /* Restore jit frame's sp to block's sp begin */
+            cc->jit_frame->sp = block->frame_sp_begin;
+            return handle_op_else(cc, p_frame_ip, true);
+        }
+        else if (block->incoming_insns_for_end_bb) {
+            *p_frame_ip = block->wasm_code_end + 1;
+            /* Restore jit frame's sp to block's sp end  */
+            cc->jit_frame->sp =
+                block->frame_sp_begin
+                + wasm_get_cell_num(block->result_types, block->result_count);
+            return handle_op_end(cc, p_frame_ip, true);
+        }
+        else {
+            *p_frame_ip = block->wasm_code_end + 1;
+            jit_block_stack_pop(&cc->block_stack);
+            jit_block_destroy(block);
+            block = jit_block_stack_top(&cc->block_stack);
+        }
+    } while (block != NULL);
+
+    return true;
+}
+
+bool
+jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
+                     uint8 *frame_ip_end, uint32 label_type, uint32 param_count,
+                     uint8 *param_types, uint32 result_count,
+                     uint8 *result_types, bool merge_cmp_and_if)
+{
+    BlockAddr block_addr_cache[BLOCK_ADDR_CACHE_SIZE][BLOCK_ADDR_CONFLICT_SIZE];
+    JitBlock *block;
+    JitReg value;
+    uint8 *else_addr, *end_addr;
+
+    /* Check block stack */
+    if (!jit_block_stack_top(&cc->block_stack)) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+
+    memset(block_addr_cache, 0, sizeof(block_addr_cache));
+
+    /* Get block info */
+    if (!(wasm_loader_find_block_addr(
+            NULL, (BlockAddr *)block_addr_cache, *p_frame_ip, frame_ip_end,
+            (uint8)label_type, &else_addr, &end_addr))) {
+        jit_set_last_error(cc, "find block end addr failed");
+        return false;
+    }
+
+    /* Allocate memory */
+    if (!(block = jit_calloc(sizeof(JitBlock)))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        return false;
+    }
+
+    if (param_count && !(block->param_types = jit_calloc(param_count))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        goto fail;
+    }
+    if (result_count && !(block->result_types = jit_calloc(result_count))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        goto fail;
+    }
+
+    /* Initialize block data */
+    block->label_type = label_type;
+    block->param_count = param_count;
+    if (param_count) {
+        bh_memcpy_s(block->param_types, param_count, param_types, param_count);
+    }
+    block->result_count = result_count;
+    if (result_count) {
+        bh_memcpy_s(block->result_types, result_count, result_types,
+                    result_count);
+    }
+    block->wasm_code_else = else_addr;
+    block->wasm_code_end = end_addr;
+
+    if (label_type == LABEL_TYPE_BLOCK) {
+        /* Push the new jit block to block stack and continue to
+           translate current basic block */
+        if (!push_jit_block_to_stack_and_pass_params(
+                cc, block, cc->cur_basic_block, 0, false))
+            goto fail;
+    }
+    else if (label_type == LABEL_TYPE_LOOP) {
+        CREATE_BASIC_BLOCK(block->basic_block_entry);
+        SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+        SET_BB_BEGIN_BCIP(block->basic_block_entry, *p_frame_ip);
+        /* Push the new jit block to block stack and continue to
+           translate the new basic block */
+        if (!push_jit_block_to_stack_and_pass_params(
+                cc, block, block->basic_block_entry, 0, false))
+            goto fail;
+    }
+    else if (label_type == LABEL_TYPE_IF) {
+        POP_I32(value);
+
+        if (!jit_reg_is_const_val(value)) {
+            /* Compare value is not constant, create condition br IR */
+
+            /* Create entry block */
+            CREATE_BASIC_BLOCK(block->basic_block_entry);
+            SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+            SET_BB_BEGIN_BCIP(block->basic_block_entry, *p_frame_ip);
+
+            if (!push_jit_block_to_stack_and_pass_params(
+                    cc, block, block->basic_block_entry, value,
+                    merge_cmp_and_if))
+                goto fail;
+        }
+        else {
+            if (jit_cc_get_const_I32(cc, value) != 0) {
+                /* Compare value is not 0, condition is true, else branch of
+                   BASIC_BLOCK if cannot be reached, we treat it same as
+                   LABEL_TYPE_BLOCK and start to translate if branch */
+                if (!push_jit_block_to_stack_and_pass_params(
+                        cc, block, cc->cur_basic_block, 0, false))
+                    goto fail;
+            }
+            else {
+                if (else_addr) {
+                    /* Compare value is not 0, condition is false, if branch of
+                       BASIC_BLOCK if cannot be reached, we treat it same as
+                       LABEL_TYPE_BLOCK and start to translate else branch */
+                    if (!push_jit_block_to_stack_and_pass_params(
+                            cc, block, cc->cur_basic_block, 0, false))
+                        goto fail;
+                    *p_frame_ip = else_addr + 1;
+                }
+                else {
+                    /* The whole if block cannot be reached, skip it */
+                    jit_block_destroy(block);
+                    *p_frame_ip = end_addr + 1;
+                }
+            }
+        }
+    }
+    else {
+        jit_set_last_error(cc, "Invalid block type");
+        goto fail;
+    }
+
+    return true;
+fail:
+    jit_block_destroy(block);
+    return false;
+}
+
+bool
+jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    return handle_op_else(cc, p_frame_ip, false);
+}
+
+bool
+jit_compile_op_end(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    return handle_op_end(cc, p_frame_ip, false);
+}
+
+/* Check whether need to copy arities when jumping from current block
+   to the dest block */
+static bool
+check_copy_arities(const JitBlock *block_dst, JitFrame *jit_frame)
+{
+    JitValueSlot *frame_sp_src = NULL;
+
+    if (block_dst->label_type == LABEL_TYPE_LOOP) {
+        frame_sp_src =
+            jit_frame->sp
+            - wasm_get_cell_num(block_dst->param_types, block_dst->param_count);
+        /* There are parameters to copy and the src/dst addr are different */
+        return (block_dst->param_count > 0
+                && block_dst->frame_sp_begin != frame_sp_src)
+                   ? true
+                   : false;
+    }
+    else {
+        frame_sp_src = jit_frame->sp
+                       - wasm_get_cell_num(block_dst->result_types,
+                                           block_dst->result_count);
+        /* There are results to copy and the src/dst addr are different */
+        return (block_dst->result_count > 0
+                && block_dst->frame_sp_begin != frame_sp_src)
+                   ? true
+                   : false;
+    }
+}
+
+static bool
+handle_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
+{
+    JitFrame *jit_frame;
+    JitBlock *block_dst, *block;
+    JitReg frame_sp_dst;
+    JitInsn *insn;
+    bool copy_arities;
+    uint32 offset;
+
+    /* Check block stack */
+    if (!(block = jit_block_stack_top(&cc->block_stack))) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+
+    if (!(block_dst = get_target_block(cc, br_depth))) {
+        return false;
+    }
+
+    jit_frame = cc->jit_frame;
+
+    /* Only opy parameters or results when their count > 0 and
+       the src/dst addr are different */
+    copy_arities = check_copy_arities(block_dst, jit_frame);
+
+    if (copy_arities) {
+        frame_sp_dst = jit_cc_new_reg_ptr(cc);
+        offset = offsetof(WASMInterpFrame, lp)
+                 + (block_dst->frame_sp_begin - jit_frame->lp) * 4;
+        GEN_INSN(ADD, frame_sp_dst, cc->fp_reg, NEW_CONST(PTR, offset));
+
+        /* No need to commit results as they will be copied to dest block */
+        gen_commit_values(jit_frame, jit_frame->lp, block->frame_sp_begin);
+    }
+    else {
+        /* Commit all including results as they won't be copied */
+        gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
+    }
+
+    if (block_dst->label_type == LABEL_TYPE_LOOP) {
+        if (copy_arities) {
+            /* Dest block is Loop block, copy loop parameters */
+            copy_block_arities(cc, frame_sp_dst, block_dst->param_types,
+                               block_dst->param_count, NULL);
+        }
+
+        clear_values(jit_frame);
+
+        /* Jump to the begin basic block */
+        BUILD_BR(block_dst->basic_block_entry);
+        SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+    }
+    else {
+        if (copy_arities) {
+            /* Dest block is Block/If/Function block, copy block results */
+            copy_block_arities(cc, frame_sp_dst, block_dst->result_types,
+                               block_dst->result_count, NULL);
+        }
+
+        clear_values(jit_frame);
+
+        /* Jump to the end basic block */
+        if (!(insn = GEN_INSN(JMP, 0))) {
+            jit_set_last_error(cc, "generate jmp insn failed");
+            goto fail;
+        }
+        if (!jit_block_add_incoming_insn(block_dst, insn, 0)) {
+            jit_set_last_error(cc, "add incoming insn failed");
+            goto fail;
+        }
+        SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip)
+{
+    return handle_op_br(cc, br_depth, p_frame_ip)
+           && handle_next_reachable_block(cc, p_frame_ip);
+}
+
+static JitFrame *
+jit_frame_clone(const JitFrame *jit_frame)
+{
+    JitFrame *jit_frame_cloned;
+    uint32 max_locals = jit_frame->max_locals;
+    uint32 max_stacks = jit_frame->max_stacks;
+    uint32 total_size;
+
+    total_size = (uint32)(offsetof(JitFrame, lp)
+                          + sizeof(*jit_frame->lp) * (max_locals + max_stacks));
+
+    jit_frame_cloned = jit_calloc(total_size);
+    if (jit_frame_cloned) {
+        bh_memcpy_s(jit_frame_cloned, total_size, jit_frame, total_size);
+        jit_frame_cloned->sp =
+            jit_frame_cloned->lp + (jit_frame->sp - jit_frame->lp);
+    }
+
+    return jit_frame_cloned;
+}
+
+static void
+jit_frame_copy(JitFrame *jit_frame_dst, const JitFrame *jit_frame_src)
+{
+    uint32 max_locals = jit_frame_src->max_locals;
+    uint32 max_stacks = jit_frame_src->max_stacks;
+    uint32 total_size;
+
+    total_size =
+        (uint32)(offsetof(JitFrame, lp)
+                 + sizeof(*jit_frame_src->lp) * (max_locals + max_stacks));
+    bh_memcpy_s(jit_frame_dst, total_size, jit_frame_src, total_size);
+    jit_frame_dst->sp =
+        jit_frame_dst->lp + (jit_frame_src->sp - jit_frame_src->lp);
+}
+
+bool
+jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth,
+                     bool merge_cmp_and_br_if, uint8 **p_frame_ip)
+{
+    JitFrame *jit_frame, *jit_frame_cloned;
+    JitBlock *block_dst;
+    JitReg cond;
+    JitBasicBlock *cur_basic_block, *if_basic_block = NULL;
+    JitInsn *insn, *insn_select = NULL, *insn_cmp = NULL;
+    bool copy_arities;
+
+    if (!(block_dst = get_target_block(cc, br_depth))) {
+        return false;
+    }
+
+    /* append IF to current basic block */
+    POP_I32(cond);
+
+    if (merge_cmp_and_br_if) {
+        get_last_cmp_and_selectcc(cc, cond, &insn_cmp, &insn_select);
+    }
+
+    jit_frame = cc->jit_frame;
+    cur_basic_block = cc->cur_basic_block;
+    gen_commit_values(jit_frame, jit_frame->lp, jit_frame->sp);
+
+    if (!(insn_select && insn_cmp)) {
+        if (!GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0))) {
+            jit_set_last_error(cc, "generate cmp insn failed");
+            goto fail;
+        }
+    }
+
+    /* Only opy parameters or results when their count > 0 and
+       the src/dst addr are different */
+    copy_arities = check_copy_arities(block_dst, jit_frame);
+
+    if (!copy_arities) {
+        if (block_dst->label_type == LABEL_TYPE_LOOP) {
+            if (!(insn = GEN_INSN(
+                      BNE, cc->cmp_reg,
+                      jit_basic_block_label(block_dst->basic_block_entry),
+                      0))) {
+                jit_set_last_error(cc, "generate bne insn failed");
+                goto fail;
+            }
+        }
+        else {
+            if (!(insn = GEN_INSN(BNE, cc->cmp_reg, 0, 0))) {
+                jit_set_last_error(cc, "generate bne insn failed");
+                goto fail;
+            }
+            if (!jit_block_add_incoming_insn(block_dst, insn, 1)) {
+                jit_set_last_error(cc, "add incoming insn failed");
+                goto fail;
+            }
+        }
+        if (insn_select && insn_cmp) {
+            /* Change `CMP + SELECTcc` into `CMP + Bcc` */
+            insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
+            jit_insn_unlink(insn_select);
+            jit_insn_delete(insn_select);
+        }
+        return true;
+    }
+
+    CREATE_BASIC_BLOCK(if_basic_block);
+    if (!(insn = GEN_INSN(BNE, cc->cmp_reg,
+                          jit_basic_block_label(if_basic_block), 0))) {
+        jit_set_last_error(cc, "generate bne insn failed");
+        goto fail;
+    }
+    if (insn_select && insn_cmp) {
+        /* Change `CMP + SELECTcc` into `CMP + Bcc` */
+        insn->opcode = JIT_OP_BEQ + (insn_select->opcode - JIT_OP_SELECTEQ);
+        jit_insn_unlink(insn_select);
+        jit_insn_delete(insn_select);
+    }
+
+    SET_BUILDER_POS(if_basic_block);
+    SET_BB_BEGIN_BCIP(if_basic_block, *p_frame_ip - 1);
+
+    /* Clone current jit frame to a new jit fame */
+    if (!(jit_frame_cloned = jit_frame_clone(jit_frame))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        goto fail;
+    }
+
+    /* Clear current jit frame so that the registers
+       in the new basic block will be loaded again */
+    clear_values(jit_frame);
+    if (!handle_op_br(cc, br_depth, p_frame_ip)) {
+        jit_free(jit_frame_cloned);
+        goto fail;
+    }
+
+    /* Restore the jit frame so that the registers can
+       be used again in current basic block */
+    jit_frame_copy(jit_frame, jit_frame_cloned);
+    jit_free(jit_frame_cloned);
+
+    /* Continue processing opcodes after BR_IF */
+    SET_BUILDER_POS(cur_basic_block);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count,
+                        uint8 **p_frame_ip)
+{
+    JitBasicBlock *cur_basic_block;
+    JitReg value;
+    JitInsn *insn;
+    uint32 i = 0;
+    JitOpndLookupSwitch *opnd = NULL;
+
+    cur_basic_block = cc->cur_basic_block;
+
+    POP_I32(value);
+
+    /* append LOOKUPSWITCH to current basic block */
+    gen_commit_values(cc->jit_frame, cc->jit_frame->lp, cc->jit_frame->sp);
+    /* Clear frame values */
+    clear_values(cc->jit_frame);
+    SET_BB_END_BCIP(cur_basic_block, *p_frame_ip - 1);
+
+    /* prepare basic blocks for br */
+    insn = GEN_INSN(LOOKUPSWITCH, value, br_count);
+    if (NULL == insn) {
+        jit_set_last_error(cc, "generate insn LOOKUPSWITCH failed");
+        goto fail;
+    }
+
+    for (i = 0, opnd = jit_insn_opndls(insn); i < br_count + 1; i++) {
+        JitBasicBlock *basic_block = NULL;
+        JitBlock *block_dst;
+        bool copy_arities;
+
+        if (!(block_dst = get_target_block(cc, br_depths[i]))) {
+            goto fail;
+        }
+
+        /* Only opy parameters or results when their count > 0 and
+           the src/dst addr are different */
+        copy_arities = check_copy_arities(block_dst, cc->jit_frame);
+
+        if (!copy_arities) {
+            /* No need to create new basic block, direclty jump to
+               the existing basic block when no need to copy arities */
+            if (i == br_count) {
+                if (block_dst->label_type == LABEL_TYPE_LOOP) {
+                    opnd->default_target =
+                        jit_basic_block_label(block_dst->basic_block_entry);
+                }
+                else {
+                    bh_assert(!block_dst->basic_block_end);
+                    if (!jit_block_add_incoming_insn(block_dst, insn, i)) {
+                        jit_set_last_error(cc, "add incoming insn failed");
+                        goto fail;
+                    }
+                }
+            }
+            else {
+                opnd->match_pairs[i].value = i;
+                if (block_dst->label_type == LABEL_TYPE_LOOP) {
+                    opnd->match_pairs[i].target =
+                        jit_basic_block_label(block_dst->basic_block_entry);
+                }
+                else {
+                    bh_assert(!block_dst->basic_block_end);
+                    if (!jit_block_add_incoming_insn(block_dst, insn, i)) {
+                        jit_set_last_error(cc, "add incoming insn failed");
+                        goto fail;
+                    }
+                }
+            }
+            continue;
+        }
+
+        /* Create new basic block when need to copy arities */
+        CREATE_BASIC_BLOCK(basic_block);
+        SET_BB_BEGIN_BCIP(basic_block, *p_frame_ip - 1);
+
+        if (i == br_count) {
+            opnd->default_target = jit_basic_block_label(basic_block);
+        }
+        else {
+            opnd->match_pairs[i].value = i;
+            opnd->match_pairs[i].target = jit_basic_block_label(basic_block);
+        }
+
+        SET_BUILDER_POS(basic_block);
+
+        if (!handle_op_br(cc, br_depths[i], p_frame_ip))
+            goto fail;
+    }
+
+    /* Search next available block to handle */
+    return handle_next_reachable_block(cc, p_frame_ip);
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_return(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    JitBlock *block_func = cc->block_stack.block_list_head;
+
+    bh_assert(block_func);
+
+    handle_func_return(cc, block_func);
+    SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
+    clear_values(cc->jit_frame);
+
+    return handle_next_reachable_block(cc, p_frame_ip);
+}
+
+bool
+jit_compile_op_unreachable(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    if (!jit_emit_exception(cc, JIT_EXCE_UNREACHABLE, JIT_OP_JMP, 0, NULL))
+        return false;
+
+    return handle_next_reachable_block(cc, p_frame_ip);
+}
+
+bool
+jit_handle_next_reachable_block(JitCompContext *cc, uint8 **p_frame_ip)
+{
+    return handle_next_reachable_block(cc, p_frame_ip);
+}

+ 56 - 0
core/iwasm/fast-jit/fe/jit_emit_control.h

@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_CONTROL_H_
+#define _JIT_EMIT_CONTROL_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_block(JitCompContext *cc, uint8 **p_frame_ip,
+                     uint8 *frame_ip_end, uint32 label_type, uint32 param_count,
+                     uint8 *param_types, uint32 result_count,
+                     uint8 *result_types, bool merge_cmp_and_if);
+
+bool
+jit_compile_op_else(JitCompContext *cc, uint8 **p_frame_ip);
+
+bool
+jit_compile_op_end(JitCompContext *cc, uint8 **p_frame_ip);
+
+bool
+jit_compile_op_br(JitCompContext *cc, uint32 br_depth, uint8 **p_frame_ip);
+
+bool
+jit_compile_op_br_if(JitCompContext *cc, uint32 br_depth,
+                     bool merge_cmp_and_br_if, uint8 **p_frame_ip);
+
+bool
+jit_compile_op_br_table(JitCompContext *cc, uint32 *br_depths, uint32 br_count,
+                        uint8 **p_frame_ip);
+
+bool
+jit_compile_op_return(JitCompContext *cc, uint8 **p_frame_ip);
+
+bool
+jit_compile_op_unreachable(JitCompContext *cc, uint8 **p_frame_ip);
+
+bool
+jit_handle_next_reachable_block(JitCompContext *cc, uint8 **p_frame_ip);
+
+#if WASM_ENABLE_THREAD_MGR != 0
+bool
+jit_check_suspend_flags(JitCompContext *cc);
+#endif
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_CONTROL_H_ */

+ 660 - 0
core/iwasm/fast-jit/fe/jit_emit_conversion.c

@@ -0,0 +1,660 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_conversion.h"
+#include "jit_emit_exception.h"
+#include "jit_emit_function.h"
+#include "../jit_codegen.h"
+#include "../jit_frontend.h"
+
+#define F32_I32_S_MIN (-2147483904.0f)
+#define F32_I32_S_MAX (2147483648.0f)
+#define F32_I32_U_MIN (-1.0f)
+#define F32_I32_U_MAX (4294967296.0f)
+#define F32_I64_S_MIN (-9223373136366403584.0f)
+#define F32_I64_S_MAX (9223372036854775808.0f)
+#define F32_I64_U_MIN (-1.0f)
+#define F32_I64_U_MAX (18446744073709551616.0f)
+
+#define F64_I32_S_MIN (-2147483649.0)
+#define F64_I32_S_MAX (2147483648.0)
+#define F64_I32_U_MIN (-1.0)
+#define F64_I32_U_MAX (4294967296.0)
+#define F64_I64_S_MIN (-9223372036854777856.0)
+#define F64_I64_S_MAX (9223372036854775808.0)
+#define F64_I64_U_MIN (-1.0)
+#define F64_I64_U_MAX (18446744073709551616.0)
+
+#define FP_TO_INT(f_ty, i_ty, f_nm, i_nm) \
+    static i_ty i_nm##_trunc_##f_nm(f_ty fp)
+
+#define INT_TO_FP(i_ty, f_ty, i_nm, f_nm) \
+    static f_ty f_nm##_convert_##i_nm(i_ty i)
+
+#define FP_TO_INT_SAT(f_ty, i_ty, f_nm, i_nm) \
+    static i_ty i_nm##_trunc_##f_nm##_sat(f_ty fp)
+
+static int
+local_isnan(double x)
+{
+    return isnan(x);
+}
+
+static int
+local_isnanf(float x)
+{
+    return isnan(x);
+}
+
+#define RETURN_IF_NANF(fp)  \
+    if (local_isnanf(fp)) { \
+        return 0;           \
+    }
+
+#define RETURN_IF_NAN(fp)  \
+    if (local_isnan(fp)) { \
+        return 0;          \
+    }
+
+#define RETURN_IF_INF(fp, i_min, i_max) \
+    if (isinf(fp)) {                    \
+        return fp < 0 ? i_min : i_max;  \
+    }
+
+#define RETURN_IF_MIN(fp, f_min, i_min) \
+    if (fp <= f_min) {                  \
+        return i_min;                   \
+    }
+
+#define RETURN_IF_MAX(fp, f_max, i_max) \
+    if (fp >= f_max) {                  \
+        return i_max;                   \
+    }
+
+FP_TO_INT_SAT(float, int32, f32, i32)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, INT32_MIN, INT32_MAX)
+    RETURN_IF_MIN(fp, F32_I32_S_MIN, INT32_MIN)
+    RETURN_IF_MAX(fp, F32_I32_S_MAX, INT32_MAX)
+    return (int32)fp;
+}
+
+FP_TO_INT_SAT(float, uint32, f32, u32)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, 0, UINT32_MAX)
+    RETURN_IF_MIN(fp, F32_I32_U_MIN, 0)
+    RETURN_IF_MAX(fp, F32_I32_U_MAX, UINT32_MAX)
+    return (uint32)fp;
+}
+
+FP_TO_INT_SAT(double, int32, f64, i32)
+{
+    RETURN_IF_NAN(fp)
+    RETURN_IF_INF(fp, INT32_MIN, INT32_MAX)
+    RETURN_IF_MIN(fp, F64_I32_S_MIN, INT32_MIN)
+    RETURN_IF_MAX(fp, F64_I32_S_MAX, INT32_MAX)
+    return (int32)fp;
+}
+
+FP_TO_INT_SAT(double, uint32, f64, u32)
+{
+    RETURN_IF_NAN(fp)
+    RETURN_IF_INF(fp, 0, UINT32_MAX)
+    RETURN_IF_MIN(fp, F64_I32_U_MIN, 0)
+    RETURN_IF_MAX(fp, F64_I32_U_MAX, UINT32_MAX)
+    return (uint32)fp;
+}
+
+FP_TO_INT_SAT(float, int64, f32, i64)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, INT64_MIN, INT64_MAX)
+    RETURN_IF_MIN(fp, F32_I64_S_MIN, INT64_MIN)
+    RETURN_IF_MAX(fp, F32_I64_S_MAX, INT64_MAX)
+    return (int64)fp;
+}
+
+FP_TO_INT(float, uint64, f32, u64)
+{
+    return (uint64)fp;
+}
+
+FP_TO_INT_SAT(float, uint64, f32, u64)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, 0, UINT64_MAX)
+    RETURN_IF_MIN(fp, F32_I64_U_MIN, 0)
+    RETURN_IF_MAX(fp, F32_I64_U_MAX, UINT64_MAX)
+    return (uint64)fp;
+}
+
+FP_TO_INT_SAT(double, int64, f64, i64)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, INT64_MIN, INT64_MAX)
+    RETURN_IF_MIN(fp, F64_I64_S_MIN, INT64_MIN)
+    RETURN_IF_MAX(fp, F64_I64_S_MAX, INT64_MAX)
+    return (int64)fp;
+}
+
+FP_TO_INT(double, uint64, f64, u64)
+{
+    return (uint64)fp;
+}
+
+FP_TO_INT_SAT(double, uint64, f64, u64)
+{
+    RETURN_IF_NANF(fp)
+    RETURN_IF_INF(fp, 0, UINT64_MAX)
+    RETURN_IF_MIN(fp, F64_I64_U_MIN, 0)
+    RETURN_IF_MAX(fp, F64_I64_U_MAX, UINT64_MAX)
+    return (uint64)fp;
+}
+
+INT_TO_FP(uint64, float, u64, f32)
+{
+    return (float)i;
+}
+
+INT_TO_FP(uint64, double, u64, f64)
+{
+    return (double)i;
+}
+
+bool
+jit_compile_op_i32_wrap_i64(JitCompContext *cc)
+{
+    JitReg num, res;
+
+    POP_I64(num);
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(I64TOI32, res, num);
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+static bool
+jit_compile_check_value_range(JitCompContext *cc, JitReg value, JitReg min_fp,
+                              JitReg max_fp)
+{
+    JitReg nan_ret = jit_cc_new_reg_I32(cc);
+    JitRegKind kind = jit_reg_kind(value);
+    bool emit_ret = false;
+
+    bh_assert(JIT_REG_KIND_F32 == kind || JIT_REG_KIND_F64 == kind);
+
+    /* If value is NaN, throw exception */
+    if (JIT_REG_KIND_F32 == kind)
+        emit_ret = jit_emit_callnative(cc, local_isnanf, nan_ret, &value, 1);
+    else
+        emit_ret = jit_emit_callnative(cc, local_isnan, nan_ret, &value, 1);
+    if (!emit_ret)
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1));
+    if (!jit_emit_exception(cc, JIT_EXCE_INVALID_CONVERSION_TO_INTEGER,
+                            JIT_OP_BEQ, cc->cmp_reg, NULL))
+        goto fail;
+
+    /* If value is out of integer range, throw exception */
+    GEN_INSN(CMP, cc->cmp_reg, min_fp, value);
+    if (!jit_emit_exception(cc, JIT_EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, value, max_fp);
+    if (!jit_emit_exception(cc, JIT_EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool sat)
+{
+    JitReg value, res;
+
+    POP_F32(value);
+
+    res = jit_cc_new_reg_I32(cc);
+    if (!sat) {
+        JitReg min_fp = NEW_CONST(F32, sign ? F32_I32_S_MIN : F32_I32_U_MIN);
+        JitReg max_fp = NEW_CONST(F32, sign ? F32_I32_S_MAX : F32_I32_U_MAX);
+
+        if (!jit_compile_check_value_range(cc, value, min_fp, max_fp))
+            goto fail;
+
+        if (sign)
+            GEN_INSN(F32TOI32, res, value);
+        else
+            GEN_INSN(F32TOU32, res, value);
+    }
+    else {
+        if (!jit_emit_callnative(cc,
+                                 sign ? (void *)i32_trunc_f32_sat
+                                      : (void *)u32_trunc_f32_sat,
+                                 res, &value, 1))
+            goto fail;
+    }
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool sat)
+{
+    JitReg value, res;
+
+    POP_F64(value);
+
+    res = jit_cc_new_reg_I32(cc);
+    if (!sat) {
+        JitReg min_fp = NEW_CONST(F64, sign ? F64_I32_S_MIN : F64_I32_U_MIN);
+        JitReg max_fp = NEW_CONST(F64, sign ? F64_I32_S_MAX : F64_I32_U_MAX);
+
+        if (!jit_compile_check_value_range(cc, value, min_fp, max_fp))
+            goto fail;
+
+        if (sign)
+            GEN_INSN(F64TOI32, res, value);
+        else
+            GEN_INSN(F64TOU32, res, value);
+    }
+    else {
+        if (!jit_emit_callnative(cc,
+                                 sign ? (void *)i32_trunc_f64_sat
+                                      : (void *)u32_trunc_f64_sat,
+                                 res, &value, 1))
+            goto fail;
+    }
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_extend_i32(JitCompContext *cc, bool sign)
+{
+    JitReg num, res;
+
+    POP_I32(num);
+
+    res = jit_cc_new_reg_I64(cc);
+    if (sign)
+        GEN_INSN(I32TOI64, res, num);
+    else
+        GEN_INSN(U32TOI64, res, num);
+
+    PUSH_I64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_extend_i64(JitCompContext *cc, int8 bitwidth)
+{
+    JitReg value, tmp, res;
+
+    POP_I64(value);
+
+    tmp = jit_cc_new_reg_I32(cc);
+    res = jit_cc_new_reg_I64(cc);
+
+    switch (bitwidth) {
+        case 8:
+        {
+            GEN_INSN(I64TOI8, tmp, value);
+            GEN_INSN(I8TOI64, res, tmp);
+            break;
+        }
+        case 16:
+        {
+            GEN_INSN(I64TOI16, tmp, value);
+            GEN_INSN(I16TOI64, res, tmp);
+            break;
+        }
+        case 32:
+        {
+            GEN_INSN(I64TOI32, tmp, value);
+            GEN_INSN(I32TOI64, res, tmp);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_I64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_extend_i32(JitCompContext *cc, int8 bitwidth)
+{
+    JitReg value, tmp, res;
+
+    POP_I32(value);
+
+    tmp = jit_cc_new_reg_I32(cc);
+    res = jit_cc_new_reg_I32(cc);
+
+    switch (bitwidth) {
+        case 8:
+        {
+            GEN_INSN(I32TOI8, tmp, value);
+            GEN_INSN(I8TOI32, res, tmp);
+            break;
+        }
+        case 16:
+        {
+            GEN_INSN(I32TOI16, tmp, value);
+            GEN_INSN(I16TOI32, res, tmp);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_trunc_f32(JitCompContext *cc, bool sign, bool sat)
+{
+    JitReg value, res;
+
+    POP_F32(value);
+
+    res = jit_cc_new_reg_I64(cc);
+    if (!sat) {
+        JitReg min_fp = NEW_CONST(F32, sign ? F32_I64_S_MIN : F32_I64_U_MIN);
+        JitReg max_fp = NEW_CONST(F32, sign ? F32_I64_S_MAX : F32_I64_U_MAX);
+
+        if (!jit_compile_check_value_range(cc, value, min_fp, max_fp))
+            goto fail;
+
+        if (sign) {
+            GEN_INSN(F32TOI64, res, value);
+        }
+        else {
+            if (!jit_emit_callnative(cc, u64_trunc_f32, res, &value, 1))
+                goto fail;
+        }
+    }
+    else {
+        if (!jit_emit_callnative(cc,
+                                 sign ? (void *)i64_trunc_f32_sat
+                                      : (void *)u64_trunc_f32_sat,
+                                 res, &value, 1))
+            goto fail;
+    }
+
+    PUSH_I64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_trunc_f64(JitCompContext *cc, bool sign, bool sat)
+{
+    JitReg value, res;
+
+    POP_F64(value);
+
+    res = jit_cc_new_reg_I64(cc);
+    if (!sat) {
+        JitReg min_fp = NEW_CONST(F64, sign ? F64_I64_S_MIN : F64_I64_U_MIN);
+        JitReg max_fp = NEW_CONST(F64, sign ? F64_I64_S_MAX : F64_I64_U_MAX);
+
+        if (!jit_compile_check_value_range(cc, value, min_fp, max_fp))
+            goto fail;
+
+        if (sign) {
+            GEN_INSN(F64TOI64, res, value);
+        }
+        else {
+            if (!jit_emit_callnative(cc, u64_trunc_f64, res, &value, 1))
+                goto fail;
+        }
+    }
+    else {
+        if (!jit_emit_callnative(cc,
+                                 sign ? (void *)i64_trunc_f64_sat
+                                      : (void *)u64_trunc_f64_sat,
+                                 res, &value, 1))
+            goto fail;
+    }
+
+    PUSH_I64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_convert_i32(JitCompContext *cc, bool sign)
+{
+    JitReg value, res;
+
+    POP_I32(value);
+
+    res = jit_cc_new_reg_F32(cc);
+    if (sign) {
+        GEN_INSN(I32TOF32, res, value);
+    }
+    else {
+        GEN_INSN(U32TOF32, res, value);
+    }
+
+    PUSH_F32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_convert_i64(JitCompContext *cc, bool sign)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+
+    res = jit_cc_new_reg_F32(cc);
+    if (sign) {
+        GEN_INSN(I64TOF32, res, value);
+    }
+    else {
+        if (!jit_emit_callnative(cc, f32_convert_u64, res, &value, 1)) {
+            goto fail;
+        }
+    }
+
+    PUSH_F32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_demote_f64(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_F64(value);
+
+    res = jit_cc_new_reg_F32(cc);
+    GEN_INSN(F64TOF32, res, value);
+
+    PUSH_F32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_convert_i32(JitCompContext *cc, bool sign)
+{
+    JitReg value, res;
+
+    POP_I32(value);
+
+    res = jit_cc_new_reg_F64(cc);
+    if (sign)
+        GEN_INSN(I32TOF64, res, value);
+    else
+        GEN_INSN(U32TOF64, res, value);
+
+    PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+
+    res = jit_cc_new_reg_F64(cc);
+    if (sign) {
+        GEN_INSN(I64TOF64, res, value);
+    }
+    else {
+        if (!jit_emit_callnative(cc, f64_convert_u64, res, &value, 1)) {
+            goto fail;
+        }
+    }
+
+    PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_promote_f32(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_F32(value);
+
+    res = jit_cc_new_reg_F64(cc);
+    GEN_INSN(F32TOF64, res, value);
+
+    PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_reinterpret_f64(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_F64(value);
+
+    res = jit_cc_new_reg_I64(cc);
+    GEN_INSN(F64CASTI64, res, value);
+
+    PUSH_I64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_reinterpret_f32(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_F32(value);
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(F32CASTI32, res, value);
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_reinterpret_i64(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+
+    res = jit_cc_new_reg_F64(cc);
+    GEN_INSN(I64CASTF64, res, value);
+
+    PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_reinterpret_i32(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I32(value);
+
+    res = jit_cc_new_reg_F32(cc);
+    GEN_INSN(I32CASTF32, res, value);
+
+    PUSH_F32(res);
+
+    return true;
+fail:
+    return false;
+}

+ 73 - 0
core/iwasm/fast-jit/fe/jit_emit_conversion.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_CONVERSION_H_
+#define _JIT_EMIT_CONVERSION_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_i32_wrap_i64(JitCompContext *cc);
+
+bool
+jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool sat);
+
+bool
+jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool sat);
+
+bool
+jit_compile_op_i64_extend_i32(JitCompContext *comp_ctx, bool sign);
+
+bool
+jit_compile_op_i64_extend_i64(JitCompContext *comp_ctx, int8 bitwidth);
+
+bool
+jit_compile_op_i32_extend_i32(JitCompContext *comp_ctx, int8 bitwidth);
+
+bool
+jit_compile_op_i64_trunc_f32(JitCompContext *cc, bool sign, bool sat);
+
+bool
+jit_compile_op_i64_trunc_f64(JitCompContext *cc, bool sign, bool sat);
+
+bool
+jit_compile_op_f32_convert_i32(JitCompContext *comp_ctx, bool sign);
+
+bool
+jit_compile_op_f32_convert_i64(JitCompContext *comp_ctx, bool sign);
+
+bool
+jit_compile_op_f32_demote_f64(JitCompContext *comp_ctx);
+
+bool
+jit_compile_op_f64_convert_i32(JitCompContext *comp_ctx, bool sign);
+
+bool
+jit_compile_op_f64_convert_i64(JitCompContext *comp_ctx, bool sign);
+
+bool
+jit_compile_op_f64_promote_f32(JitCompContext *comp_ctx);
+
+bool
+jit_compile_op_i64_reinterpret_f64(JitCompContext *comp_ctx);
+
+bool
+jit_compile_op_i32_reinterpret_f32(JitCompContext *comp_ctx);
+
+bool
+jit_compile_op_f64_reinterpret_i64(JitCompContext *comp_ctx);
+
+bool
+jit_compile_op_f32_reinterpret_i32(JitCompContext *comp_ctx);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_CONVERSION_H_ */

+ 78 - 0
core/iwasm/fast-jit/fe/jit_emit_exception.c

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_exception.h"
+#include "../jit_frontend.h"
+
+bool
+jit_emit_exception(JitCompContext *cc, int32 exception_id, uint8 jit_opcode,
+                   JitReg cond_br_if, JitBasicBlock *cond_br_else_block)
+{
+    JitInsn *insn = NULL;
+    JitIncomingInsn *incoming_insn;
+    JitReg else_label;
+
+    bh_assert(exception_id < JIT_EXCE_NUM);
+
+    if (jit_opcode >= JIT_OP_BEQ && jit_opcode <= JIT_OP_BLEU) {
+        bh_assert(cond_br_if == cc->cmp_reg);
+        else_label =
+            cond_br_else_block ? jit_basic_block_label(cond_br_else_block) : 0;
+        switch (jit_opcode) {
+            case JIT_OP_BEQ:
+                insn = GEN_INSN(BEQ, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BNE:
+                insn = GEN_INSN(BNE, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BGTS:
+                insn = GEN_INSN(BGTS, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BGES:
+                insn = GEN_INSN(BGES, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BLTS:
+                insn = GEN_INSN(BLTS, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BLES:
+                insn = GEN_INSN(BLES, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BGTU:
+                insn = GEN_INSN(BGTU, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BGEU:
+                insn = GEN_INSN(BGEU, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BLTU:
+                insn = GEN_INSN(BLTU, cond_br_if, 0, else_label);
+                break;
+            case JIT_OP_BLEU:
+                insn = GEN_INSN(BLEU, cond_br_if, 0, else_label);
+                break;
+        }
+        if (!insn) {
+            jit_set_last_error(cc, "generate cond br insn failed");
+            return false;
+        }
+    }
+    else if (jit_opcode == JIT_OP_JMP) {
+        insn = GEN_INSN(JMP, 0);
+        if (!insn) {
+            jit_set_last_error(cc, "generate jmp insn failed");
+            return false;
+        }
+    }
+
+    incoming_insn = jit_calloc(sizeof(JitIncomingInsn));
+    if (!incoming_insn) {
+        jit_set_last_error(cc, "allocate memory failed");
+        return false;
+    }
+
+    incoming_insn->insn = insn;
+    incoming_insn->next = cc->incoming_insns_for_exec_bbs[exception_id];
+    cc->incoming_insns_for_exec_bbs[exception_id] = incoming_insn;
+    return true;
+}

+ 23 - 0
core/iwasm/fast-jit/fe/jit_emit_exception.h

@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_EXCEPTION_H_
+#define _JIT_EMIT_EXCEPTION_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_emit_exception(JitCompContext *cc, int32 exception_id, uint8 jit_opcode,
+                   JitReg cond_br_if, JitBasicBlock *cond_br_else_block);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_EXCEPTION_H_ */

+ 535 - 0
core/iwasm/fast-jit/fe/jit_emit_function.c

@@ -0,0 +1,535 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_function.h"
+#include "jit_emit_exception.h"
+#include "../jit_frontend.h"
+#include "../jit_codegen.h"
+#include "../../interpreter/wasm_runtime.h"
+
+extern bool
+jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx,
+                  WASMInterpFrame *prev_frame);
+
+/* Prepare parameters for the function to call */
+static bool
+pre_call(JitCompContext *cc, const WASMType *func_type)
+{
+    JitReg value;
+    uint32 i, outs_off;
+    /* Prepare parameters for the function to call */
+    outs_off =
+        cc->total_frame_size + offsetof(WASMInterpFrame, lp)
+        + wasm_get_cell_num(func_type->types, func_type->param_count) * 4;
+
+    for (i = 0; i < func_type->param_count; i++) {
+        switch (func_type->types[func_type->param_count - 1 - i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+                POP_I32(value);
+                outs_off -= 4;
+                GEN_INSN(STI32, value, cc->fp_reg, NEW_CONST(I32, outs_off));
+                break;
+            case VALUE_TYPE_I64:
+                POP_I64(value);
+                outs_off -= 8;
+                GEN_INSN(STI64, value, cc->fp_reg, NEW_CONST(I32, outs_off));
+                break;
+            case VALUE_TYPE_F32:
+                POP_F32(value);
+                outs_off -= 4;
+                GEN_INSN(STF32, value, cc->fp_reg, NEW_CONST(I32, outs_off));
+                break;
+            case VALUE_TYPE_F64:
+                POP_F64(value);
+                outs_off -= 8;
+                GEN_INSN(STF64, value, cc->fp_reg, NEW_CONST(I32, outs_off));
+                break;
+            default:
+                bh_assert(0);
+                goto fail;
+        }
+    }
+
+    /* Commit sp as the callee may use it to store the results */
+    gen_commit_sp_ip(cc->jit_frame);
+
+    return true;
+fail:
+    return false;
+}
+
+/* Push results */
+static bool
+post_return(JitCompContext *cc, const WASMType *func_type, JitReg first_res)
+{
+    uint32 i, n;
+    JitReg value;
+
+    n = cc->jit_frame->sp - cc->jit_frame->lp;
+    for (i = 0; i < func_type->result_count; i++) {
+        switch (func_type->types[func_type->param_count + i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+                if (i == 0 && first_res) {
+                    bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_I32);
+                    value = first_res;
+                }
+                else {
+                    value = jit_cc_new_reg_I32(cc);
+                    GEN_INSN(LDI32, value, cc->fp_reg,
+                             NEW_CONST(I32, offset_of_local(n)));
+                }
+                PUSH_I32(value);
+                n++;
+                break;
+            case VALUE_TYPE_I64:
+                if (i == 0 && first_res) {
+                    bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_I64);
+                    value = first_res;
+                }
+                else {
+                    value = jit_cc_new_reg_I64(cc);
+                    GEN_INSN(LDI64, value, cc->fp_reg,
+                             NEW_CONST(I32, offset_of_local(n)));
+                }
+                PUSH_I64(value);
+                n += 2;
+                break;
+            case VALUE_TYPE_F32:
+                if (i == 0 && first_res) {
+                    bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_F32);
+                    value = first_res;
+                }
+                else {
+                    value = jit_cc_new_reg_F32(cc);
+                    GEN_INSN(LDF32, value, cc->fp_reg,
+                             NEW_CONST(I32, offset_of_local(n)));
+                }
+                PUSH_F32(value);
+                n++;
+                break;
+            case VALUE_TYPE_F64:
+                if (i == 0 && first_res) {
+                    bh_assert(jit_reg_kind(first_res) == JIT_REG_KIND_F64);
+                    value = first_res;
+                }
+                else {
+                    value = jit_cc_new_reg_F64(cc);
+                    GEN_INSN(LDF64, value, cc->fp_reg,
+                             NEW_CONST(I32, offset_of_local(n)));
+                }
+                PUSH_F64(value);
+                n += 2;
+                break;
+            default:
+                bh_assert(0);
+                goto fail;
+        }
+    }
+
+    /* Update the committed_sp as the callee has updated the frame sp */
+    cc->jit_frame->committed_sp = cc->jit_frame->sp;
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call)
+{
+    WASMModule *wasm_module = cc->cur_wasm_module;
+    WASMFunctionImport *func_import;
+    WASMFunction *func;
+    WASMType *func_type;
+    JitFrame *jit_frame = cc->jit_frame;
+    JitReg native_ret;
+    JitReg fast_jit_func_ptrs, jitted_code = 0;
+    uint32 jitted_func_idx;
+
+    if (func_idx >= wasm_module->import_function_count) {
+        fast_jit_func_ptrs = get_fast_jit_func_ptrs_reg(jit_frame);
+        jitted_code = jit_cc_new_reg_ptr(cc);
+        /* jitted_code = func_ptrs[func_idx - import_function_count] */
+        jitted_func_idx = func_idx - wasm_module->import_function_count;
+        GEN_INSN(LDPTR, jitted_code, fast_jit_func_ptrs,
+                 NEW_CONST(I32, (uint32)sizeof(void *) * jitted_func_idx));
+    }
+
+    if (func_idx < wasm_module->import_function_count) {
+        func_import = &wasm_module->import_functions[func_idx].u.function;
+        func_type = func_import->func_type;
+    }
+    else {
+        func = wasm_module
+                   ->functions[func_idx - wasm_module->import_function_count];
+        func_type = func->func_type;
+    }
+
+    if (!pre_call(cc, func_type)) {
+        goto fail;
+    }
+
+    if (func_idx < wasm_module->import_function_count) {
+        JitReg arg_regs[3];
+
+        native_ret = jit_cc_new_reg_I32(cc);
+        arg_regs[0] = cc->exec_env_reg;
+        arg_regs[1] = NEW_CONST(I32, func_idx);
+        arg_regs[2] = cc->fp_reg;
+
+        if (!jit_emit_callnative(cc, jit_invoke_native, native_ret, arg_regs,
+                                 3)) {
+            return false;
+        }
+        /* Convert bool to uint32 */
+        GEN_INSN(AND, native_ret, native_ret, NEW_CONST(I32, 0xFF));
+
+        /* Check whether there is exception thrown */
+        GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
+        if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BEQ,
+                                cc->cmp_reg, NULL)) {
+            return false;
+        }
+
+        if (!post_return(cc, func_type, 0)) {
+            goto fail;
+        }
+    }
+    else {
+        JitReg res = 0;
+
+        if (func_type->result_count > 0) {
+            switch (func_type->types[func_type->param_count]) {
+                case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+                case VALUE_TYPE_EXTERNREF:
+                case VALUE_TYPE_FUNCREF:
+#endif
+                    res = jit_cc_new_reg_I32(cc);
+                    break;
+                case VALUE_TYPE_I64:
+                    res = jit_cc_new_reg_I64(cc);
+                    break;
+                case VALUE_TYPE_F32:
+                    res = jit_cc_new_reg_F32(cc);
+                    break;
+                case VALUE_TYPE_F64:
+                    res = jit_cc_new_reg_F64(cc);
+                    break;
+                default:
+                    bh_assert(0);
+                    goto fail;
+            }
+        }
+
+        GEN_INSN(CALLBC, res, 0, jitted_code);
+
+        if (!post_return(cc, func_type, res)) {
+            goto fail;
+        }
+    }
+
+    /* Clear part of memory regs and table regs as their values
+       may be changed in the function call */
+    if (cc->cur_wasm_module->possible_memory_grow)
+        clear_memory_regs(jit_frame);
+    clear_table_regs(jit_frame);
+
+    /* Ignore tail call currently */
+    (void)tail_call;
+    return true;
+fail:
+    return false;
+}
+
+static JitReg
+pack_argv(JitCompContext *cc)
+{
+    /* reuse the stack of the next frame */
+    uint32 stack_base;
+    JitReg argv;
+
+    stack_base = cc->total_frame_size + offsetof(WASMInterpFrame, lp);
+    argv = jit_cc_new_reg_ptr(cc);
+    GEN_INSN(ADD, argv, cc->fp_reg, NEW_CONST(PTR, stack_base));
+    return argv;
+}
+
+static bool
+unpack_argv(JitCompContext *cc, const WASMType *func_type, JitReg argv)
+{
+    uint32 i, offset_by_cell = 0;
+    JitReg value;
+
+    /* push results in argv to stack */
+    for (i = 0; i < func_type->result_count; i++) {
+        switch (func_type->types[func_type->param_count + i]) {
+            case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+            case VALUE_TYPE_EXTERNREF:
+            case VALUE_TYPE_FUNCREF:
+#endif
+            {
+                value = jit_cc_new_reg_I32(cc);
+                GEN_INSN(LDI32, value, argv, NEW_CONST(I32, offset_by_cell));
+                PUSH_I32(value);
+                offset_by_cell += 4;
+                break;
+            }
+            case VALUE_TYPE_I64:
+            {
+                value = jit_cc_new_reg_I64(cc);
+                GEN_INSN(LDI64, value, argv, NEW_CONST(I32, offset_by_cell));
+                PUSH_I64(value);
+                offset_by_cell += 8;
+                break;
+            }
+            case VALUE_TYPE_F32:
+            {
+                value = jit_cc_new_reg_F32(cc);
+                GEN_INSN(LDF32, value, argv, NEW_CONST(I32, offset_by_cell));
+                PUSH_F32(value);
+                offset_by_cell += 4;
+                break;
+            }
+            case VALUE_TYPE_F64:
+            {
+                value = jit_cc_new_reg_F64(cc);
+                GEN_INSN(LDF64, value, argv, NEW_CONST(I32, offset_by_cell));
+                PUSH_F64(value);
+                offset_by_cell += 8;
+                break;
+            }
+            default:
+            {
+                bh_assert(0);
+                goto fail;
+            }
+        }
+    }
+
+    /* Update the committed_sp as the callee has updated the frame sp */
+    cc->jit_frame->committed_sp = cc->jit_frame->sp;
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx,
+                             uint32 tbl_idx)
+{
+    JitReg elem_idx, native_ret, argv, arg_regs[6];
+    WASMType *func_type;
+
+    POP_I32(elem_idx);
+
+    func_type = cc->cur_wasm_module->types[type_idx];
+    if (!pre_call(cc, func_type)) {
+        goto fail;
+    }
+
+    argv = pack_argv(cc);
+
+    native_ret = jit_cc_new_reg_I32(cc);
+    arg_regs[0] = cc->exec_env_reg;
+    arg_regs[1] = NEW_CONST(I32, tbl_idx);
+    arg_regs[2] = elem_idx;
+    arg_regs[3] = NEW_CONST(I32, type_idx);
+    arg_regs[4] = NEW_CONST(I32, func_type->param_cell_num);
+    arg_regs[5] = argv;
+
+    if (!jit_emit_callnative(cc, jit_call_indirect, native_ret, arg_regs, 6)) {
+        return false;
+    }
+    /* Convert bool to uint32 */
+    GEN_INSN(AND, native_ret, native_ret, NEW_CONST(I32, 0xFF));
+
+    /* Check whether there is exception thrown */
+    GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BEQ,
+                            cc->cmp_reg, NULL)) {
+        return false;
+    }
+
+    if (!unpack_argv(cc, func_type, argv)) {
+        goto fail;
+    }
+
+    /* Clear part of memory regs and table regs as their values
+       may be changed in the function call */
+    if (cc->cur_wasm_module->possible_memory_grow)
+        clear_memory_regs(cc->jit_frame);
+    clear_table_regs(cc->jit_frame);
+    return true;
+fail:
+    return false;
+}
+
+#if WASM_ENABLE_REF_TYPES != 0
+bool
+jit_compile_op_ref_null(JitCompContext *cc, uint32 ref_type)
+{
+    PUSH_I32(NEW_CONST(I32, NULL_REF));
+    (void)ref_type;
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_ref_is_null(JitCompContext *cc)
+{
+    JitReg ref, res;
+
+    POP_I32(ref);
+
+    GEN_INSN(CMP, cc->cmp_reg, ref, NEW_CONST(I32, NULL_REF));
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(SELECTEQ, res, cc->cmp_reg, NEW_CONST(I32, 1), NEW_CONST(I32, 0));
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_ref_func(JitCompContext *cc, uint32 func_idx)
+{
+    PUSH_I32(NEW_CONST(I32, func_idx));
+    return true;
+fail:
+    return false;
+}
+#endif
+
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+bool
+jit_emit_callnative(JitCompContext *cc, void *native_func, JitReg res,
+                    JitReg *params, uint32 param_count)
+{
+    JitInsn *insn;
+    char *i64_arg_names[] = { "rdi", "rsi", "rdx", "rcx", "r8", "r9" };
+    char *f32_arg_names[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" };
+    char *f64_arg_names[] = { "xmm0_f64", "xmm1_f64", "xmm2_f64",
+                              "xmm3_f64", "xmm4_f64", "xmm5_f64" };
+    JitReg i64_arg_regs[6], f32_arg_regs[6], f64_arg_regs[6], res_hreg = 0;
+    JitReg eax_hreg = jit_codegen_get_hreg_by_name("eax");
+    JitReg rax_hreg = jit_codegen_get_hreg_by_name("rax");
+    JitReg xmm0_hreg = jit_codegen_get_hreg_by_name("xmm0");
+    JitReg xmm0_f64_hreg = jit_codegen_get_hreg_by_name("xmm0_f64");
+    uint32 i, i64_reg_idx, float_reg_idx;
+
+    bh_assert(param_count <= 6);
+
+    for (i = 0; i < 6; i++) {
+        i64_arg_regs[i] = jit_codegen_get_hreg_by_name(i64_arg_names[i]);
+        f32_arg_regs[i] = jit_codegen_get_hreg_by_name(f32_arg_names[i]);
+        f64_arg_regs[i] = jit_codegen_get_hreg_by_name(f64_arg_names[i]);
+    }
+
+    i64_reg_idx = float_reg_idx = 0;
+    for (i = 0; i < param_count; i++) {
+        switch (jit_reg_kind(params[i])) {
+            case JIT_REG_KIND_I32:
+                GEN_INSN(I32TOI64, i64_arg_regs[i64_reg_idx++], params[i]);
+                break;
+            case JIT_REG_KIND_I64:
+                GEN_INSN(MOV, i64_arg_regs[i64_reg_idx++], params[i]);
+                break;
+            case JIT_REG_KIND_F32:
+                GEN_INSN(MOV, f32_arg_regs[float_reg_idx++], params[i]);
+                break;
+            case JIT_REG_KIND_F64:
+                GEN_INSN(MOV, f64_arg_regs[float_reg_idx++], params[i]);
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+
+    if (res) {
+        switch (jit_reg_kind(res)) {
+            case JIT_REG_KIND_I32:
+                res_hreg = eax_hreg;
+                break;
+            case JIT_REG_KIND_I64:
+                res_hreg = rax_hreg;
+                break;
+            case JIT_REG_KIND_F32:
+                res_hreg = xmm0_hreg;
+                break;
+            case JIT_REG_KIND_F64:
+                res_hreg = xmm0_f64_hreg;
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+
+    insn = GEN_INSN(CALLNATIVE, res_hreg,
+                    NEW_CONST(PTR, (uintptr_t)native_func), param_count);
+    if (!insn) {
+        return false;
+    }
+
+    i64_reg_idx = float_reg_idx = 0;
+    for (i = 0; i < param_count; i++) {
+        switch (jit_reg_kind(params[i])) {
+            case JIT_REG_KIND_I32:
+            case JIT_REG_KIND_I64:
+                *(jit_insn_opndv(insn, i + 2)) = i64_arg_regs[i64_reg_idx++];
+                break;
+            case JIT_REG_KIND_F32:
+                *(jit_insn_opndv(insn, i + 2)) = f32_arg_regs[float_reg_idx++];
+                break;
+            case JIT_REG_KIND_F64:
+                *(jit_insn_opndv(insn, i + 2)) = f64_arg_regs[float_reg_idx++];
+                break;
+            default:
+                bh_assert(0);
+                return false;
+        }
+    }
+
+    if (res) {
+        GEN_INSN(MOV, res, res_hreg);
+    }
+
+    return true;
+}
+#else
+bool
+jit_emit_callnative(JitCompContext *cc, void *native_func, JitReg res,
+                    JitReg *params, uint32 param_count)
+{
+    JitInsn *insn;
+    uint32 i;
+
+    bh_assert(param_count <= 6);
+
+    insn = GEN_INSN(CALLNATIVE, res, NEW_CONST(PTR, (uintptr_t)native_func),
+                    param_count);
+    if (!insn)
+        return false;
+
+    for (i = 0; i < param_count; i++) {
+        *(jit_insn_opndv(insn, i + 2)) = params[i];
+    }
+    return true;
+}
+#endif

+ 39 - 0
core/iwasm/fast-jit/fe/jit_emit_function.h

@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_FUNCTION_H_
+#define _JIT_EMIT_FUNCTION_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call);
+
+bool
+jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx,
+                             uint32 tbl_idx);
+
+bool
+jit_compile_op_ref_null(JitCompContext *cc, uint32 ref_type);
+
+bool
+jit_compile_op_ref_is_null(JitCompContext *cc);
+
+bool
+jit_compile_op_ref_func(JitCompContext *cc, uint32 func_idx);
+
+bool
+jit_emit_callnative(JitCompContext *cc, void *native_func, JitReg res,
+                    JitReg *params, uint32 param_count);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_FUNCTION_H_ */

+ 782 - 0
core/iwasm/fast-jit/fe/jit_emit_memory.c

@@ -0,0 +1,782 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_memory.h"
+#include "jit_emit_exception.h"
+#include "jit_emit_function.h"
+#include "../jit_frontend.h"
+#include "../jit_codegen.h"
+#include "../../interpreter/wasm_runtime.h"
+
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+static JitReg
+get_memory_boundary(JitCompContext *cc, uint32 mem_idx, uint32 bytes)
+{
+    JitReg memory_boundary;
+
+    switch (bytes) {
+        case 1:
+        {
+            memory_boundary =
+                get_mem_bound_check_1byte_reg(cc->jit_frame, mem_idx);
+            break;
+        }
+        case 2:
+        {
+            memory_boundary =
+                get_mem_bound_check_2bytes_reg(cc->jit_frame, mem_idx);
+            break;
+        }
+        case 4:
+        {
+            memory_boundary =
+                get_mem_bound_check_4bytes_reg(cc->jit_frame, mem_idx);
+            break;
+        }
+        case 8:
+        {
+            memory_boundary =
+                get_mem_bound_check_8bytes_reg(cc->jit_frame, mem_idx);
+            break;
+        }
+        case 16:
+        {
+            memory_boundary =
+                get_mem_bound_check_16bytes_reg(cc->jit_frame, mem_idx);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return memory_boundary;
+fail:
+    return 0;
+}
+#endif
+
+#if UINTPTR_MAX == UINT64_MAX
+static JitReg
+check_and_seek_on_64bit_platform(JitCompContext *cc, JitReg addr, JitReg offset,
+                                 JitReg memory_boundary)
+{
+    JitReg long_addr, offset1;
+
+    /* long_addr = (int64_t)addr */
+    long_addr = jit_cc_new_reg_I64(cc);
+    GEN_INSN(U32TOI64, long_addr, addr);
+
+    /* offset1 = offset + long_addr */
+    offset1 = jit_cc_new_reg_I64(cc);
+    GEN_INSN(ADD, offset1, offset, long_addr);
+
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* if (offset1 > memory_boundary) goto EXCEPTION */
+    GEN_INSN(CMP, cc->cmp_reg, offset1, memory_boundary);
+    if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS,
+                            JIT_OP_BGTU, cc->cmp_reg, NULL)) {
+        goto fail;
+    }
+#endif
+
+    return offset1;
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+fail:
+    return 0;
+#endif
+}
+#else
+static JitReg
+check_and_seek_on_32bit_platform(JitCompContext *cc, JitReg addr, JitReg offset,
+                                 JitReg memory_boundary)
+{
+    JitReg offset1;
+
+    /* offset1 = offset + addr */
+    offset1 = jit_cc_new_reg_I32(cc);
+    GEN_INSN(ADD, offset1, offset, addr);
+
+    /* if (offset1 < addr) goto EXCEPTION */
+    GEN_INSN(CMP, cc->cmp_reg, offset1, addr);
+    if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS,
+                            JIT_OP_BLTU, cc->cmp_reg, NULL)) {
+        goto fail;
+    }
+
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* if (offset1 > memory_boundary) goto EXCEPTION */
+    GEN_INSN(CMP, cc->cmp_reg, offset1, memory_boundary);
+    if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS,
+                            JIT_OP_BGTU, cc->cmp_reg, NULL)) {
+        goto fail;
+    }
+#endif
+
+    return offset1;
+fail:
+    return 0;
+}
+#endif
+
+static JitReg
+check_and_seek(JitCompContext *cc, JitReg addr, uint32 offset, uint32 bytes)
+{
+    JitReg memory_boundary = 0, offset1;
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* the default memory */
+    uint32 mem_idx = 0;
+#endif
+
+#ifndef OS_ENABLE_HW_BOUND_CHECK
+    /* ---------- check ---------- */
+    /* 1. shortcut if the memory size is 0 */
+    if (0 == cc->cur_wasm_module->memories[mem_idx].init_page_count) {
+        JitReg memory_inst, cur_mem_page_count;
+
+        /* if (cur_mem_page_count == 0) goto EXCEPTION */
+        memory_inst = get_memory_inst_reg(cc->jit_frame, mem_idx);
+        cur_mem_page_count = jit_cc_new_reg_I32(cc);
+        GEN_INSN(LDI32, cur_mem_page_count, memory_inst,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance, cur_page_count)));
+        GEN_INSN(CMP, cc->cmp_reg, cur_mem_page_count, NEW_CONST(I32, 0));
+        if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS,
+                                JIT_OP_BEQ, cc->cmp_reg, NULL)) {
+            goto fail;
+        }
+    }
+
+    /* 2. a complete boundary check */
+    memory_boundary = get_memory_boundary(cc, mem_idx, bytes);
+    if (!memory_boundary)
+        goto fail;
+#endif
+
+#if UINTPTR_MAX == UINT64_MAX
+    offset1 = check_and_seek_on_64bit_platform(cc, addr, NEW_CONST(I64, offset),
+                                               memory_boundary);
+    if (!offset1)
+        goto fail;
+#else
+    offset1 = check_and_seek_on_32bit_platform(cc, addr, NEW_CONST(I32, offset),
+                                               memory_boundary);
+    if (!offset1)
+        goto fail;
+#endif
+
+    return offset1;
+fail:
+    return 0;
+}
+
+bool
+jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset,
+                        uint32 bytes, bool sign, bool atomic)
+{
+    JitReg addr, offset1, value, memory_data;
+
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, bytes);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    value = jit_cc_new_reg_I32(cc);
+    switch (bytes) {
+        case 1:
+        {
+            if (sign) {
+                GEN_INSN(LDI8, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU8, value, memory_data, offset1);
+            }
+            break;
+        }
+        case 2:
+        {
+            if (sign) {
+                GEN_INSN(LDI16, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU16, value, memory_data, offset1);
+            }
+            break;
+        }
+        case 4:
+        {
+            if (sign) {
+                GEN_INSN(LDI32, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU32, value, memory_data, offset1);
+            }
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_I32(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset,
+                        uint32 bytes, bool sign, bool atomic)
+{
+    JitReg addr, offset1, value, memory_data;
+
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, bytes);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    value = jit_cc_new_reg_I64(cc);
+    switch (bytes) {
+        case 1:
+        {
+            if (sign) {
+                GEN_INSN(LDI8, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU8, value, memory_data, offset1);
+            }
+            break;
+        }
+        case 2:
+        {
+            if (sign) {
+                GEN_INSN(LDI16, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU16, value, memory_data, offset1);
+            }
+            break;
+        }
+        case 4:
+        {
+            if (sign) {
+                GEN_INSN(LDI32, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU32, value, memory_data, offset1);
+            }
+            break;
+        }
+        case 8:
+        {
+            if (sign) {
+                GEN_INSN(LDI64, value, memory_data, offset1);
+            }
+            else {
+                GEN_INSN(LDU64, value, memory_data, offset1);
+            }
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_I64(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_load(JitCompContext *cc, uint32 align, uint32 offset)
+{
+    JitReg addr, offset1, value, memory_data;
+
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, 4);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    value = jit_cc_new_reg_F32(cc);
+    GEN_INSN(LDF32, value, memory_data, offset1);
+
+    PUSH_F32(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_load(JitCompContext *cc, uint32 align, uint32 offset)
+{
+    JitReg addr, offset1, value, memory_data;
+
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, 8);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    value = jit_cc_new_reg_F64(cc);
+    GEN_INSN(LDF64, value, memory_data, offset1);
+
+    PUSH_F64(value);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_store(JitCompContext *cc, uint32 align, uint32 offset,
+                         uint32 bytes, bool atomic)
+{
+    JitReg value, addr, offset1, memory_data;
+
+    POP_I32(value);
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, bytes);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    switch (bytes) {
+        case 1:
+        {
+            GEN_INSN(STI8, value, memory_data, offset1);
+            break;
+        }
+        case 2:
+        {
+            GEN_INSN(STI16, value, memory_data, offset1);
+            break;
+        }
+        case 4:
+        {
+            GEN_INSN(STI32, value, memory_data, offset1);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset,
+                         uint32 bytes, bool atomic)
+{
+    JitReg value, addr, offset1, memory_data;
+
+    POP_I64(value);
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, bytes);
+    if (!offset1) {
+        goto fail;
+    }
+
+    if (jit_reg_is_const(value) && bytes < 8) {
+        value = NEW_CONST(I32, (int32)jit_cc_get_const_I64(cc, value));
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    switch (bytes) {
+        case 1:
+        {
+            GEN_INSN(STI8, value, memory_data, offset1);
+            break;
+        }
+        case 2:
+        {
+            GEN_INSN(STI16, value, memory_data, offset1);
+            break;
+        }
+        case 4:
+        {
+            GEN_INSN(STI32, value, memory_data, offset1);
+            break;
+        }
+        case 8:
+        {
+            GEN_INSN(STI64, value, memory_data, offset1);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_store(JitCompContext *cc, uint32 align, uint32 offset)
+{
+    JitReg value, addr, offset1, memory_data;
+
+    POP_F32(value);
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, 4);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    GEN_INSN(STF32, value, memory_data, offset1);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_store(JitCompContext *cc, uint32 align, uint32 offset)
+{
+    JitReg value, addr, offset1, memory_data;
+
+    POP_F64(value);
+    POP_I32(addr);
+
+    offset1 = check_and_seek(cc, addr, offset, 8);
+    if (!offset1) {
+        goto fail;
+    }
+
+    memory_data = get_memory_data_reg(cc->jit_frame, 0);
+
+    GEN_INSN(STF64, value, memory_data, offset1);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_memory_size(JitCompContext *cc, uint32 mem_idx)
+{
+    JitReg mem_inst, res;
+
+    mem_inst = get_memory_inst_reg(cc->jit_frame, mem_idx);
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(LDI32, res, mem_inst,
+             NEW_CONST(I32, offsetof(WASMMemoryInstance, cur_page_count)));
+
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx)
+{
+    JitReg memory_inst, grow_res, res;
+    JitReg prev_page_count, inc_page_count, args[2];
+
+    /* Get current page count */
+    memory_inst = get_memory_inst_reg(cc->jit_frame, mem_idx);
+    prev_page_count = jit_cc_new_reg_I32(cc);
+    GEN_INSN(LDI32, prev_page_count, memory_inst,
+             NEW_CONST(I32, offsetof(WASMMemoryInstance, cur_page_count)));
+
+    /* Call wasm_enlarge_memory */
+    POP_I32(inc_page_count);
+
+    grow_res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = inc_page_count;
+
+    if (!jit_emit_callnative(cc, wasm_enlarge_memory, grow_res, args, 2)) {
+        goto fail;
+    }
+    /* Convert bool to uint32 */
+    GEN_INSN(AND, grow_res, grow_res, NEW_CONST(I32, 0xFF));
+
+    /* return different values according to memory.grow result */
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(CMP, cc->cmp_reg, grow_res, NEW_CONST(I32, 0));
+    GEN_INSN(SELECTNE, res, cc->cmp_reg, prev_page_count,
+             NEW_CONST(I32, (int32)-1));
+    PUSH_I32(res);
+
+    /* Ensure a refresh in next get memory related registers */
+    clear_memory_regs(cc->jit_frame);
+
+    return true;
+fail:
+    return false;
+}
+
+#if WASM_ENABLE_BULK_MEMORY != 0
+static int
+wasm_init_memory(WASMModuleInstance *inst, uint32 mem_idx, uint32 seg_idx,
+                 uint32 len, uint32 mem_offset, uint32 data_offset)
+{
+    WASMMemoryInstance *mem_inst;
+    WASMDataSeg *data_segment;
+    uint32 mem_size;
+    uint8 *mem_addr, *data_addr;
+
+    /* if d + n > the length of mem.data */
+    mem_inst = inst->memories[mem_idx];
+    mem_size = mem_inst->cur_page_count * mem_inst->num_bytes_per_page;
+    if (mem_size < mem_offset || mem_size - mem_offset < len)
+        goto out_of_bounds;
+
+    /* if s + n > the length of data.data */
+    bh_assert(seg_idx < inst->module->data_seg_count);
+    data_segment = inst->module->data_segments[seg_idx];
+    if (data_segment->data_length < data_offset
+        || data_segment->data_length - data_offset < len)
+        goto out_of_bounds;
+
+    mem_addr = mem_inst->memory_data + mem_offset;
+    data_addr = data_segment->data + data_offset;
+    bh_memcpy_s(mem_addr, mem_size - mem_offset, data_addr, len);
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds memory access");
+    return -1;
+}
+
+bool
+jit_compile_op_memory_init(JitCompContext *cc, uint32 mem_idx, uint32 seg_idx)
+{
+    JitReg len, mem_offset, data_offset, res;
+    JitReg args[6] = { 0 };
+
+    POP_I32(len);
+    POP_I32(data_offset);
+    POP_I32(mem_offset);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, mem_idx);
+    args[2] = NEW_CONST(I32, seg_idx);
+    args[3] = len;
+    args[4] = mem_offset;
+    args[5] = data_offset;
+
+    if (!jit_emit_callnative(cc, wasm_init_memory, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_data_drop(JitCompContext *cc, uint32 seg_idx)
+{
+    JitReg module = get_module_reg(cc->jit_frame);
+    JitReg data_segments = jit_cc_new_reg_ptr(cc);
+    JitReg data_segment = jit_cc_new_reg_ptr(cc);
+
+    GEN_INSN(LDPTR, data_segments, module,
+             NEW_CONST(I32, offsetof(WASMModule, data_segments)));
+    GEN_INSN(LDPTR, data_segment, data_segments,
+             NEW_CONST(I32, seg_idx * sizeof(WASMDataSeg *)));
+    GEN_INSN(STI32, NEW_CONST(I32, 0), data_segment,
+             NEW_CONST(I32, offsetof(WASMDataSeg, data_length)));
+
+    return true;
+}
+
+static int
+wasm_copy_memory(WASMModuleInstance *inst, uint32 src_mem_idx,
+                 uint32 dst_mem_idx, uint32 len, uint32 src_offset,
+                 uint32 dst_offset)
+{
+    WASMMemoryInstance *src_mem, *dst_mem;
+    uint32 src_mem_size, dst_mem_size;
+    uint8 *src_addr, *dst_addr;
+
+    src_mem = inst->memories[src_mem_idx];
+    dst_mem = inst->memories[dst_mem_idx];
+    src_mem_size = src_mem->cur_page_count * src_mem->num_bytes_per_page;
+    dst_mem_size = dst_mem->cur_page_count * dst_mem->num_bytes_per_page;
+
+    /* if s + n > the length of mem.data */
+    if (src_mem_size < src_offset || src_mem_size - src_offset < len)
+        goto out_of_bounds;
+
+    /* if d + n > the length of mem.data */
+    if (dst_mem_size < dst_offset || dst_mem_size - dst_offset < len)
+        goto out_of_bounds;
+
+    src_addr = src_mem->memory_data + src_offset;
+    dst_addr = dst_mem->memory_data + dst_offset;
+    /* allowing the destination and source to overlap */
+    bh_memmove_s(dst_addr, dst_mem_size - dst_offset, src_addr, len);
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds memory access");
+    return -1;
+}
+
+bool
+jit_compile_op_memory_copy(JitCompContext *cc, uint32 src_mem_idx,
+                           uint32 dst_mem_idx)
+{
+    JitReg len, src, dst, res;
+    JitReg args[6] = { 0 };
+
+    POP_I32(len);
+    POP_I32(src);
+    POP_I32(dst);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, src_mem_idx);
+    args[2] = NEW_CONST(I32, dst_mem_idx);
+    args[3] = len;
+    args[4] = src;
+    args[5] = dst;
+
+    if (!jit_emit_callnative(cc, wasm_copy_memory, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+
+static int
+wasm_fill_memory(WASMModuleInstance *inst, uint32 mem_idx, uint32 len,
+                 uint32 val, uint32 dst)
+{
+    WASMMemoryInstance *mem_inst;
+    uint32 mem_size;
+    uint8 *dst_addr;
+
+    mem_inst = inst->memories[mem_idx];
+    mem_size = mem_inst->cur_page_count * mem_inst->num_bytes_per_page;
+
+    if (mem_size < dst || mem_size - dst < len)
+        goto out_of_bounds;
+
+    dst_addr = mem_inst->memory_data + dst;
+    memset(dst_addr, val, len);
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds memory access");
+    return -1;
+}
+
+bool
+jit_compile_op_memory_fill(JitCompContext *cc, uint32 mem_idx)
+{
+    JitReg res, len, val, dst;
+    JitReg args[5] = { 0 };
+
+    POP_I32(len);
+    POP_I32(val);
+    POP_I32(dst);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, mem_idx);
+    args[2] = len;
+    args[3] = val;
+    args[4] = dst;
+
+    if (!jit_emit_callnative(cc, wasm_fill_memory, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+#endif
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+bool
+jit_compile_op_atomic_rmw(JitCompContext *cc, uint8 atomic_op, uint8 op_type,
+                          uint32 align, uint32 offset, uint32 bytes)
+{
+    return false;
+}
+
+bool
+jit_compile_op_atomic_cmpxchg(JitCompContext *cc, uint8 op_type, uint32 align,
+                              uint32 offset, uint32 bytes)
+{
+    return false;
+}
+
+bool
+jit_compile_op_atomic_wait(JitCompContext *cc, uint8 op_type, uint32 align,
+                           uint32 offset, uint32 bytes)
+{
+    return false;
+}
+
+bool
+jit_compiler_op_atomic_notify(JitCompContext *cc, uint32 align, uint32 offset,
+                              uint32 bytes)
+{
+    return false;
+}
+#endif

+ 89 - 0
core/iwasm/fast-jit/fe/jit_emit_memory.h

@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_MEMORY_H_
+#define _JIT_EMIT_MEMORY_H_
+
+#include "../jit_compiler.h"
+#if WASM_ENABLE_SHARED_MEMORY != 0
+#include "../../common/wasm_shared_memory.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_i32_load(JitCompContext *cc, uint32 align, uint32 offset,
+                        uint32 bytes, bool sign, bool atomic);
+
+bool
+jit_compile_op_i64_load(JitCompContext *cc, uint32 align, uint32 offset,
+                        uint32 bytes, bool sign, bool atomic);
+
+bool
+jit_compile_op_f32_load(JitCompContext *cc, uint32 align, uint32 offset);
+
+bool
+jit_compile_op_f64_load(JitCompContext *cc, uint32 align, uint32 offset);
+
+bool
+jit_compile_op_i32_store(JitCompContext *cc, uint32 align, uint32 offset,
+                         uint32 bytes, bool atomic);
+
+bool
+jit_compile_op_i64_store(JitCompContext *cc, uint32 align, uint32 offset,
+                         uint32 bytes, bool atomic);
+
+bool
+jit_compile_op_f32_store(JitCompContext *cc, uint32 align, uint32 offset);
+
+bool
+jit_compile_op_f64_store(JitCompContext *cc, uint32 align, uint32 offset);
+
+bool
+jit_compile_op_memory_size(JitCompContext *cc, uint32 mem_idx);
+
+bool
+jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx);
+
+#if WASM_ENABLE_BULK_MEMORY != 0
+bool
+jit_compile_op_memory_init(JitCompContext *cc, uint32 mem_idx, uint32 seg_idx);
+
+bool
+jit_compile_op_data_drop(JitCompContext *cc, uint32 seg_idx);
+
+bool
+jit_compile_op_memory_copy(JitCompContext *cc, uint32 src_mem_idx,
+                           uint32 dst_mem_idx);
+
+bool
+jit_compile_op_memory_fill(JitCompContext *cc, uint32 mem_idx);
+#endif
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+bool
+jit_compile_op_atomic_rmw(JitCompContext *cc, uint8 atomic_op, uint8 op_type,
+                          uint32 align, uint32 offset, uint32 bytes);
+
+bool
+jit_compile_op_atomic_cmpxchg(JitCompContext *cc, uint8 op_type, uint32 align,
+                              uint32 offset, uint32 bytes);
+
+bool
+jit_compile_op_atomic_wait(JitCompContext *cc, uint8 op_type, uint32 align,
+                           uint32 offset, uint32 bytes);
+
+bool
+jit_compiler_op_atomic_notify(JitCompContext *cc, uint32 align, uint32 offset,
+                              uint32 bytes);
+#endif
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_MEMORY_H_ */

+ 1651 - 0
core/iwasm/fast-jit/fe/jit_emit_numberic.c

@@ -0,0 +1,1651 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_numberic.h"
+#include "jit_emit_exception.h"
+#include "jit_emit_control.h"
+#include "jit_emit_function.h"
+#include "../jit_frontend.h"
+#include "../jit_codegen.h"
+
+#define PUSH_INT(v)      \
+    do {                 \
+        if (is_i32)      \
+            PUSH_I32(v); \
+        else             \
+            PUSH_I64(v); \
+    } while (0)
+
+#define POP_INT(v)      \
+    do {                \
+        if (is_i32)     \
+            POP_I32(v); \
+        else            \
+            POP_I64(v); \
+    } while (0)
+
+#define PUSH_FLOAT(v)    \
+    do {                 \
+        if (is_f32)      \
+            PUSH_F32(v); \
+        else             \
+            PUSH_F64(v); \
+    } while (0)
+
+#define POP_FLOAT(v)    \
+    do {                \
+        if (is_f32)     \
+            POP_F32(v); \
+        else            \
+            POP_F64(v); \
+    } while (0)
+
+#define DEF_INT_UNARY_OP(op, err)            \
+    do {                                     \
+        JitReg res, operand;                 \
+        POP_INT(operand);                    \
+        if (!(res = op)) {                   \
+            if (err)                         \
+                jit_set_last_error(cc, err); \
+            goto fail;                       \
+        }                                    \
+        PUSH_INT(res);                       \
+    } while (0)
+
+#define DEF_INT_BINARY_OP(op, err)           \
+    do {                                     \
+        JitReg res, left, right;             \
+        POP_INT(right);                      \
+        POP_INT(left);                       \
+        if (!(res = op)) {                   \
+            if (err)                         \
+                jit_set_last_error(cc, err); \
+            goto fail;                       \
+        }                                    \
+        PUSH_INT(res);                       \
+    } while (0)
+
+#define DEF_FP_UNARY_OP(op, err)             \
+    do {                                     \
+        JitReg res, operand;                 \
+        POP_FLOAT(operand);                  \
+        if (!(res = op)) {                   \
+            if (err)                         \
+                jit_set_last_error(cc, err); \
+            goto fail;                       \
+        }                                    \
+        PUSH_FLOAT(res);                     \
+    } while (0)
+
+#define DEF_FP_BINARY_OP(op, err)            \
+    do {                                     \
+        JitReg res, left, right;             \
+        POP_FLOAT(right);                    \
+        POP_FLOAT(left);                     \
+        if (!(res = op)) {                   \
+            if (err)                         \
+                jit_set_last_error(cc, err); \
+            goto fail;                       \
+        }                                    \
+        PUSH_FLOAT(res);                     \
+    } while (0)
+
+static uint32
+clz32(uint32 type)
+{
+    uint32 num = 0;
+    if (type == 0)
+        return 32;
+    while (!(type & 0x80000000)) {
+        num++;
+        type <<= 1;
+    }
+    return num;
+}
+
+static uint64
+clz64(uint64 type)
+{
+    uint32 num = 0;
+    if (type == 0)
+        return 64;
+    while (!(type & 0x8000000000000000LL)) {
+        num++;
+        type <<= 1;
+    }
+    return num;
+}
+
+static uint32
+ctz32(uint32 type)
+{
+    uint32 num = 0;
+    if (type == 0)
+        return 32;
+    while (!(type & 1)) {
+        num++;
+        type >>= 1;
+    }
+    return num;
+}
+
+static uint64
+ctz64(uint64 type)
+{
+    uint32 num = 0;
+    if (type == 0)
+        return 64;
+    while (!(type & 1)) {
+        num++;
+        type >>= 1;
+    }
+    return num;
+}
+
+static uint32
+popcnt32(uint32 u)
+{
+    uint32 ret = 0;
+    while (u) {
+        u = (u & (u - 1));
+        ret++;
+    }
+    return ret;
+}
+
+static uint64
+popcnt64(uint64 u)
+{
+    uint32 ret = 0;
+    while (u) {
+        u = (u & (u - 1));
+        ret++;
+    }
+    return ret;
+}
+
+bool
+jit_compile_op_i32_clz(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I32(value);
+    if (jit_reg_is_const(value)) {
+        uint32 i32 = jit_cc_get_const_I32(cc, value);
+        PUSH_I32(NEW_CONST(I32, clz32(i32)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(CLZ, res, value);
+    PUSH_I32(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_ctz(JitCompContext *cc)
+{
+    JitReg value, res = jit_cc_new_reg_I32(cc);
+
+    POP_I32(value);
+    if (jit_reg_is_const(value)) {
+        uint32 i32 = jit_cc_get_const_I32(cc, value);
+        PUSH_I32(NEW_CONST(I32, ctz32(i32)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(CTZ, res, value);
+    PUSH_I32(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_popcnt(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I32(value);
+    if (jit_reg_is_const(value)) {
+        uint32 i32 = jit_cc_get_const_I32(cc, value);
+        PUSH_I32(NEW_CONST(I32, popcnt32(i32)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(POPCNT, res, value);
+    PUSH_I32(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_clz(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+    if (jit_reg_is_const(value)) {
+        uint64 i64 = jit_cc_get_const_I64(cc, value);
+        PUSH_I64(NEW_CONST(I64, clz64(i64)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I64(cc);
+    GEN_INSN(CLZ, res, value);
+    PUSH_I64(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_ctz(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+    if (jit_reg_is_const(value)) {
+        uint64 i64 = jit_cc_get_const_I64(cc, value);
+        PUSH_I64(NEW_CONST(I64, ctz64(i64)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I64(cc);
+    GEN_INSN(CTZ, res, value);
+    PUSH_I64(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i64_popcnt(JitCompContext *cc)
+{
+    JitReg value, res;
+
+    POP_I64(value);
+    if (jit_reg_is_const(value)) {
+        uint64 i64 = jit_cc_get_const_I64(cc, value);
+        PUSH_I64(NEW_CONST(I64, popcnt64(i64)));
+        return true;
+    }
+
+    res = jit_cc_new_reg_I64(cc);
+    GEN_INSN(POPCNT, res, value);
+    PUSH_I64(res);
+    return true;
+fail:
+    return false;
+}
+
+#define IS_CONST_ALL_ONE(val, is_i32)                    \
+    (jit_reg_is_const(val)                               \
+     && ((is_i32 && jit_cc_get_const_I32(cc, val) == -1) \
+         || (!is_i32 && jit_cc_get_const_I64(cc, val) == -1LL)))
+
+#define IS_CONST_ZERO(val)                              \
+    (jit_reg_is_const(val)                              \
+     && ((is_i32 && jit_cc_get_const_I32(cc, val) == 0) \
+         || (!is_i32 && jit_cc_get_const_I64(cc, val) == 0)))
+
+/* macros for integer binary operations (ibinop) */
+
+#define __DEF_BI_INT_CONST_OPS(bits, opname, op)                               \
+    static int##bits do_i##bits##_const_##opname(int##bits lhs, int##bits rhs) \
+    {                                                                          \
+        return lhs op rhs;                                                     \
+    }
+
+#define DEF_BI_INT_CONST_OPS(opname, op)   \
+    __DEF_BI_INT_CONST_OPS(32, opname, op) \
+    __DEF_BI_INT_CONST_OPS(64, opname, op)
+
+#define DEF_UNI_INT_CONST_OPS(opname)            \
+    static JitReg compile_int_##opname##_consts( \
+        JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+
+typedef JitReg (*uni_const_handler)(JitCompContext *, JitReg, JitReg, bool);
+typedef int32 (*bin_i32_consts_handler)(int32, int32);
+typedef int64 (*bin_i64_consts_handler)(int64, int64);
+
+/* ibinopt for integer binary operations */
+static JitReg
+compile_op_ibinopt_const(JitCompContext *cc, JitReg left, JitReg right,
+                         bool is_i32, uni_const_handler handle_one_const,
+                         bin_i32_consts_handler handle_two_i32_const,
+                         bin_i64_consts_handler handle_two_i64_const)
+{
+    JitReg res;
+
+    if (jit_reg_is_const(left) && jit_reg_is_const(right)) {
+        if (is_i32) {
+            int32 left_val = jit_cc_get_const_I32(cc, left);
+            int32 right_val = jit_cc_get_const_I32(cc, right);
+            res = NEW_CONST(I32, handle_two_i32_const(left_val, right_val));
+        }
+        else {
+            int64 left_val = jit_cc_get_const_I64(cc, left);
+            int64 right_val = jit_cc_get_const_I64(cc, right);
+            res = NEW_CONST(I64, handle_two_i64_const(left_val, right_val));
+        }
+        goto shortcut;
+    }
+
+    if (jit_reg_is_const(left) || jit_reg_is_const(right)) {
+        res = handle_one_const(cc, left, right, is_i32);
+        if (res)
+            goto shortcut;
+    }
+
+    return 0;
+shortcut:
+    return res;
+}
+
+#define CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, opname) \
+    compile_op_ibinopt_const(cc, left, right, is_i32,                 \
+                             compile_int_##opname##_consts,           \
+                             do_i32_const_##opname, do_i64_const_##opname)
+
+DEF_UNI_INT_CONST_OPS(add)
+{
+    /* If one of the operands is 0, just return the other */
+    if (IS_CONST_ZERO(left))
+        return right;
+    if (IS_CONST_ZERO(right))
+        return left;
+
+    return 0;
+}
+
+DEF_BI_INT_CONST_OPS(add, +)
+
+static JitReg
+compile_int_add(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, add);
+    if (res)
+        goto shortcut;
+
+    /* Build add */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(ADD, res, left, right);
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(sub)
+{
+    /* If the right operand is 0, just return the left */
+    if (IS_CONST_ZERO(right))
+        return left;
+
+    return 0;
+}
+
+DEF_BI_INT_CONST_OPS(sub, -)
+
+static JitReg
+compile_int_sub(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, sub);
+    if (res)
+        goto shortcut;
+
+    /* Build sub */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(SUB, res, left, right);
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(mul)
+{
+    /* If one of the operands is 0, just return constant 0 */
+    if (IS_CONST_ZERO(left) || IS_CONST_ZERO(right))
+        return is_i32 ? NEW_CONST(I32, 0) : NEW_CONST(I64, 0);
+
+    return 0;
+}
+
+static int32
+do_i32_const_mul(int32 lhs, int32 rhs)
+{
+    return (int32)((uint64)lhs * (uint64)rhs);
+}
+
+static int64
+do_i64_const_mul(int64 lhs, int64 rhs)
+{
+    return (int64)((uint64)lhs * (uint64)rhs);
+}
+
+static JitReg
+compile_int_mul(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, mul);
+    if (res)
+        goto shortcut;
+
+    /* Build mul */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(MUL, res, left, right);
+
+shortcut:
+    return res;
+}
+
+static bool
+compile_int_div_no_check(JitCompContext *cc, IntArithmetic arith_op,
+                         bool is_i32, JitReg left, JitReg right, JitReg res)
+{
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg eax_hreg = jit_codegen_get_hreg_by_name("eax");
+    JitReg edx_hreg = jit_codegen_get_hreg_by_name("edx");
+    JitReg rax_hreg = jit_codegen_get_hreg_by_name("rax");
+    JitReg rdx_hreg = jit_codegen_get_hreg_by_name("rdx");
+#endif
+
+    if (jit_reg_is_const(right) && jit_reg_is_const(left)) {
+        if (INT_DIV_S == arith_op || INT_REM_S == arith_op) {
+            if (is_i32) {
+                int32 lhs = jit_cc_get_const_I32(cc, left);
+                int32 rhs = jit_cc_get_const_I32(cc, right);
+                if (INT_DIV_S == arith_op) {
+                    res = NEW_CONST(I32, lhs / rhs);
+                }
+                else {
+                    res = NEW_CONST(I32, lhs % rhs);
+                }
+                PUSH_I32(res);
+                return true;
+            }
+            else {
+                int64 lhs = jit_cc_get_const_I64(cc, left);
+                int64 rhs = jit_cc_get_const_I64(cc, right);
+                if (INT_DIV_S == arith_op) {
+                    res = NEW_CONST(I64, lhs / rhs);
+                }
+                else {
+                    res = NEW_CONST(I64, lhs % rhs);
+                }
+                PUSH_I64(res);
+                return true;
+            }
+        }
+        else {
+            if (is_i32) {
+                uint32 lhs = (uint32)jit_cc_get_const_I32(cc, left);
+                uint32 rhs = (uint32)jit_cc_get_const_I32(cc, right);
+                if (INT_DIV_U == arith_op) {
+                    res = NEW_CONST(I32, lhs / rhs);
+                }
+                else {
+                    res = NEW_CONST(I32, lhs % rhs);
+                }
+                PUSH_I32(res);
+                return true;
+            }
+            else {
+                uint64 lhs = (uint64)jit_cc_get_const_I64(cc, left);
+                uint64 rhs = (uint64)jit_cc_get_const_I64(cc, right);
+                if (INT_DIV_U == arith_op) {
+                    res = NEW_CONST(I64, lhs / rhs);
+                }
+                else {
+                    res = NEW_CONST(I64, lhs % rhs);
+                }
+                PUSH_I64(res);
+                return true;
+            }
+        }
+    }
+
+    switch (arith_op) {
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+        case INT_DIV_S:
+        case INT_DIV_U:
+        {
+            JitInsn *insn = NULL, *insn1 = NULL;
+
+            if (is_i32) {
+                GEN_INSN(MOV, eax_hreg, left);
+                if (arith_op == INT_DIV_S)
+                    insn = GEN_INSN(DIV_S, eax_hreg, eax_hreg, right);
+                else
+                    insn = GEN_INSN(DIV_U, eax_hreg, eax_hreg, right);
+            }
+            else {
+                GEN_INSN(MOV, rax_hreg, left);
+                if (arith_op == INT_DIV_S)
+                    insn = GEN_INSN(DIV_S, rax_hreg, rax_hreg, right);
+                else
+                    insn = GEN_INSN(DIV_U, rax_hreg, rax_hreg, right);
+            }
+
+            jit_lock_reg_in_insn(cc, insn, eax_hreg);
+            jit_lock_reg_in_insn(cc, insn, edx_hreg);
+
+            if (is_i32) {
+                res = jit_cc_new_reg_I32(cc);
+                insn1 = jit_insn_new_MOV(res, eax_hreg);
+            }
+            else {
+                res = jit_cc_new_reg_I64(cc);
+                insn1 = jit_insn_new_MOV(res, rax_hreg);
+            }
+
+            if (insn && insn1) {
+                jit_insn_insert_after(insn, insn1);
+            }
+            break;
+        }
+        case INT_REM_S:
+        case INT_REM_U:
+        {
+            JitInsn *insn = NULL, *insn1 = NULL;
+
+            if (is_i32) {
+                GEN_INSN(MOV, eax_hreg, left);
+                if (arith_op == INT_REM_S)
+                    insn = GEN_INSN(REM_S, edx_hreg, eax_hreg, right);
+                else
+                    insn = GEN_INSN(REM_U, edx_hreg, eax_hreg, right);
+            }
+            else {
+                GEN_INSN(MOV, rax_hreg, left);
+                if (arith_op == INT_REM_S)
+                    insn = GEN_INSN(REM_S, rdx_hreg, rax_hreg, right);
+                else
+                    insn = GEN_INSN(REM_U, rdx_hreg, rax_hreg, right);
+            }
+
+            jit_lock_reg_in_insn(cc, insn, eax_hreg);
+            jit_lock_reg_in_insn(cc, insn, edx_hreg);
+
+            if (is_i32) {
+                res = jit_cc_new_reg_I32(cc);
+                insn1 = jit_insn_new_MOV(res, edx_hreg);
+            }
+            else {
+                res = jit_cc_new_reg_I64(cc);
+                insn1 = jit_insn_new_MOV(res, rdx_hreg);
+            }
+
+            if (insn && insn1) {
+                jit_insn_insert_after(insn, insn1);
+            }
+            break;
+        }
+#else
+        case INT_DIV_S:
+            GEN_INSN(DIV_S, res, left, right);
+            break;
+        case INT_DIV_U:
+            GEN_INSN(DIV_U, res, left, right);
+            break;
+        case INT_REM_S:
+            GEN_INSN(REM_S, res, left, right);
+            break;
+        case INT_REM_U:
+            GEN_INSN(REM_U, res, left, right);
+            break;
+#endif /* defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) */
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+    if (is_i32)
+        PUSH_I32(res);
+    else
+        PUSH_I64(res);
+    return true;
+fail:
+    return false;
+}
+
+static bool
+compile_int_div(JitCompContext *cc, IntArithmetic arith_op, bool is_i32,
+                uint8 **p_frame_ip)
+{
+    JitReg left, right, res;
+
+    bh_assert(arith_op == INT_DIV_S || arith_op == INT_DIV_U
+              || arith_op == INT_REM_S || arith_op == INT_REM_U);
+
+    if (is_i32) {
+        POP_I32(right);
+        POP_I32(left);
+        res = jit_cc_new_reg_I32(cc);
+    }
+    else {
+        POP_I64(right);
+        POP_I64(left);
+        res = jit_cc_new_reg_I64(cc);
+    }
+
+    if (jit_reg_is_const(right)) {
+        int64 right_val = is_i32 ? (int64)jit_cc_get_const_I32(cc, right)
+                                 : jit_cc_get_const_I64(cc, right);
+
+        switch (right_val) {
+            case 0:
+            {
+                /* Directly throw exception if divided by zero */
+                if (!(jit_emit_exception(cc, JIT_EXCE_INTEGER_DIVIDE_BY_ZERO,
+                                         JIT_OP_JMP, 0, NULL)))
+                    goto fail;
+
+                return jit_handle_next_reachable_block(cc, p_frame_ip);
+            }
+            case 1:
+            {
+                if (arith_op == INT_DIV_S || arith_op == INT_DIV_U) {
+                    if (is_i32)
+                        PUSH_I32(left);
+                    else
+                        PUSH_I64(left);
+                }
+                else {
+                    if (is_i32)
+                        PUSH_I32(NEW_CONST(I32, 0));
+                    else
+                        PUSH_I64(NEW_CONST(I64, 0));
+                }
+                return true;
+            }
+            case -1:
+            {
+                if (arith_op == INT_DIV_S) {
+                    if (is_i32)
+                        GEN_INSN(CMP, cc->cmp_reg, left,
+                                 NEW_CONST(I32, INT32_MIN));
+                    else
+                        GEN_INSN(CMP, cc->cmp_reg, left,
+                                 NEW_CONST(I64, INT64_MIN));
+
+                    /* Throw integer overflow exception if left is
+                       INT32_MIN or INT64_MIN */
+                    if (!(jit_emit_exception(cc, JIT_EXCE_INTEGER_OVERFLOW,
+                                             JIT_OP_BEQ, cc->cmp_reg, NULL)))
+                        goto fail;
+
+                    /* Push -(left) to stack */
+                    GEN_INSN(NEG, res, left);
+                    if (is_i32)
+                        PUSH_I32(res);
+                    else
+                        PUSH_I64(res);
+                    return true;
+                }
+                else if (arith_op == INT_REM_S) {
+                    if (is_i32)
+                        PUSH_I32(NEW_CONST(I32, 0));
+                    else
+                        PUSH_I64(NEW_CONST(I64, 0));
+                    return true;
+                }
+                else {
+                    /* Build default div and rem */
+                    return compile_int_div_no_check(cc, arith_op, is_i32, left,
+                                                    right, res);
+                }
+            }
+            default:
+            {
+                /* Build default div and rem */
+                return compile_int_div_no_check(cc, arith_op, is_i32, left,
+                                                right, res);
+            }
+        }
+    }
+    else {
+        JitReg cmp1 = jit_cc_new_reg_I32(cc);
+        JitReg cmp2 = jit_cc_new_reg_I32(cc);
+
+        GEN_INSN(CMP, cc->cmp_reg, right,
+                 is_i32 ? NEW_CONST(I32, 0) : NEW_CONST(I64, 0));
+        /* Throw integer divided by zero exception if right is zero */
+        if (!(jit_emit_exception(cc, JIT_EXCE_INTEGER_DIVIDE_BY_ZERO,
+                                 JIT_OP_BEQ, cc->cmp_reg, NULL)))
+            goto fail;
+
+        switch (arith_op) {
+            case INT_DIV_S:
+            {
+                /* Check integer overflow */
+                GEN_INSN(CMP, cc->cmp_reg, left,
+                         is_i32 ? NEW_CONST(I32, INT32_MIN)
+                                : NEW_CONST(I64, INT64_MIN));
+                GEN_INSN(SELECTEQ, cmp1, cc->cmp_reg, NEW_CONST(I32, 1),
+                         NEW_CONST(I32, 0));
+                GEN_INSN(CMP, cc->cmp_reg, right,
+                         is_i32 ? NEW_CONST(I32, -1) : NEW_CONST(I64, -1LL));
+                GEN_INSN(SELECTEQ, cmp2, cc->cmp_reg, NEW_CONST(I32, 1),
+                         NEW_CONST(I32, 0));
+                GEN_INSN(AND, cmp1, cmp1, cmp2);
+                GEN_INSN(CMP, cc->cmp_reg, cmp1, NEW_CONST(I32, 1));
+                /* Throw integer overflow exception if left is INT32_MIN or
+                   INT64_MIN, and right is -1 */
+                if (!(jit_emit_exception(cc, JIT_EXCE_INTEGER_OVERFLOW,
+                                         JIT_OP_BEQ, cc->cmp_reg, NULL)))
+                    goto fail;
+
+                /* Build default div and rem */
+                return compile_int_div_no_check(cc, arith_op, is_i32, left,
+                                                right, res);
+            }
+            case INT_REM_S:
+            {
+                GEN_INSN(CMP, cc->cmp_reg, right,
+                         is_i32 ? NEW_CONST(I32, -1) : NEW_CONST(I64, -1LL));
+                if (is_i32)
+                    GEN_INSN(SELECTEQ, left, cc->cmp_reg, NEW_CONST(I32, 0),
+                             left);
+                else
+                    GEN_INSN(SELECTEQ, left, cc->cmp_reg, NEW_CONST(I64, 0),
+                             left);
+                /* Build default div and rem */
+                return compile_int_div_no_check(cc, arith_op, is_i32, left,
+                                                right, res);
+            }
+            default:
+            {
+                /* Build default div and rem */
+                return compile_int_div_no_check(cc, arith_op, is_i32, left,
+                                                right, res);
+            }
+        }
+    }
+
+fail:
+    return false;
+}
+
+static bool
+compile_op_int_arithmetic(JitCompContext *cc, IntArithmetic arith_op,
+                          bool is_i32, uint8 **p_frame_ip)
+{
+    switch (arith_op) {
+        case INT_ADD:
+            DEF_INT_BINARY_OP(compile_int_add(cc, left, right, is_i32),
+                              "compile int add fail.");
+            return true;
+        case INT_SUB:
+            DEF_INT_BINARY_OP(compile_int_sub(cc, left, right, is_i32),
+                              "compile int sub fail.");
+            return true;
+        case INT_MUL:
+            DEF_INT_BINARY_OP(compile_int_mul(cc, left, right, is_i32),
+                              "compile int mul fail.");
+            return true;
+        case INT_DIV_S:
+        case INT_DIV_U:
+        case INT_REM_S:
+        case INT_REM_U:
+            return compile_int_div(cc, arith_op, is_i32, p_frame_ip);
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_arithmetic(JitCompContext *cc, IntArithmetic arith_op,
+                              uint8 **p_frame_ip)
+{
+    return compile_op_int_arithmetic(cc, arith_op, true, p_frame_ip);
+}
+
+bool
+jit_compile_op_i64_arithmetic(JitCompContext *cc, IntArithmetic arith_op,
+                              uint8 **p_frame_ip)
+{
+    return compile_op_int_arithmetic(cc, arith_op, false, p_frame_ip);
+}
+
+DEF_UNI_INT_CONST_OPS(and)
+{
+    JitReg res;
+    if (IS_CONST_ZERO(left) || IS_CONST_ZERO(right)) {
+        res = is_i32 ? NEW_CONST(I32, 0) : NEW_CONST(I64, 0);
+        goto shortcut;
+    }
+
+    if (IS_CONST_ALL_ONE(left, is_i32)) {
+        res = right;
+        goto shortcut;
+    }
+
+    if (IS_CONST_ALL_ONE(right, is_i32)) {
+        res = left;
+        goto shortcut;
+    }
+
+    return 0;
+shortcut:
+    return res;
+}
+
+DEF_BI_INT_CONST_OPS(and, &)
+
+static JitReg
+compile_int_and(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    /* shortcuts */
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, and);
+    if (res)
+        goto shortcut;
+
+    /* do and */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(AND, res, left, right);
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(or)
+{
+    JitReg res;
+
+    if (IS_CONST_ZERO(left)) {
+        res = right;
+        goto shortcut;
+    }
+
+    if (IS_CONST_ZERO(right)) {
+        res = left;
+        goto shortcut;
+    }
+
+    if (IS_CONST_ALL_ONE(left, is_i32) || IS_CONST_ALL_ONE(right, is_i32)) {
+        res = is_i32 ? NEW_CONST(I32, -1) : NEW_CONST(I64, -1LL);
+        goto shortcut;
+    }
+
+    return 0;
+shortcut:
+    return res;
+}
+
+DEF_BI_INT_CONST_OPS(or, |)
+
+static JitReg
+compile_int_or(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    /* shortcuts */
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, or);
+    if (res)
+        goto shortcut;
+
+    /* do or */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(OR, res, left, right);
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(xor)
+{
+    if (IS_CONST_ZERO(left))
+        return right;
+
+    if (IS_CONST_ZERO(right))
+        return left;
+
+    return 0;
+}
+
+DEF_BI_INT_CONST_OPS(xor, ^)
+
+static JitReg
+compile_int_xor(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+
+    /* shortcuts */
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, xor);
+    if (res)
+        goto shortcut;
+
+    /* do xor */
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+    GEN_INSN(XOR, res, left, right);
+
+shortcut:
+    return res;
+}
+
+static bool
+compile_op_int_bitwise(JitCompContext *cc, IntBitwise arith_op, bool is_i32)
+{
+    JitReg left, right, res;
+
+    POP_INT(right);
+    POP_INT(left);
+
+    switch (arith_op) {
+        case INT_AND:
+        {
+            res = compile_int_and(cc, left, right, is_i32);
+            break;
+        }
+        case INT_OR:
+        {
+            res = compile_int_or(cc, left, right, is_i32);
+            break;
+        }
+        case INT_XOR:
+        {
+            res = compile_int_xor(cc, left, right, is_i32);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_INT(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_bitwise(JitCompContext *cc, IntBitwise bitwise_op)
+{
+    return compile_op_int_bitwise(cc, bitwise_op, true);
+}
+
+bool
+jit_compile_op_i64_bitwise(JitCompContext *cc, IntBitwise bitwise_op)
+{
+    return compile_op_int_bitwise(cc, bitwise_op, false);
+}
+
+DEF_UNI_INT_CONST_OPS(shl)
+{
+    if (IS_CONST_ZERO(right) || IS_CONST_ZERO(left)) {
+        return left;
+    }
+
+    if (jit_reg_is_const(right)) {
+        JitReg res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(SHL, res, left, right);
+        return res;
+    }
+    return 0;
+}
+
+DEF_UNI_INT_CONST_OPS(shrs)
+{
+    if (IS_CONST_ZERO(right) || IS_CONST_ZERO(left)
+        || IS_CONST_ALL_ONE(left, is_i32)) {
+        return left;
+    }
+
+    if (jit_reg_is_const(right)) {
+        JitReg res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(SHRS, res, left, right);
+        return res;
+    }
+    return 0;
+}
+
+DEF_UNI_INT_CONST_OPS(shru)
+{
+    if (IS_CONST_ZERO(right) || IS_CONST_ZERO(left)) {
+        return left;
+    }
+
+    if (jit_reg_is_const(right)) {
+        JitReg res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(SHRU, res, left, right);
+        return res;
+    }
+    return 0;
+}
+
+static int32
+do_i32_const_shl(int32 lhs, int32 rhs)
+{
+    return (int32)((uint32)lhs << (uint32)rhs);
+}
+
+static int64
+do_i64_const_shl(int64 lhs, int64 rhs)
+{
+    return (int32)((uint64)lhs << (uint64)rhs);
+}
+
+DEF_BI_INT_CONST_OPS(shrs, >>)
+
+static int32
+do_i32_const_shru(int32 lhs, int32 rhs)
+{
+    return (uint32)lhs >> rhs;
+}
+
+static int64
+do_i64_const_shru(int64 lhs, int64 rhs)
+{
+    return (uint64)lhs >> rhs;
+}
+
+typedef enum { SHL, SHRS, SHRU, ROTL, ROTR } SHIFT_OP;
+
+static JitReg
+compile_int_shift_modulo(JitCompContext *cc, JitReg rhs, bool is_i32,
+                         SHIFT_OP op)
+{
+    JitReg res;
+
+    if (jit_reg_is_const(rhs)) {
+        if (is_i32) {
+            int32 val = jit_cc_get_const_I32(cc, rhs);
+            val = val & 0x1f;
+            res = NEW_CONST(I32, val);
+        }
+        else {
+            int64 val = jit_cc_get_const_I64(cc, rhs);
+            val = val & 0x3f;
+            res = NEW_CONST(I64, val);
+        }
+    }
+    else {
+        if (op == ROTL || op == ROTR) {
+            /* No need to generate AND insn as the result
+               is same for rotate shift */
+            res = rhs;
+        }
+        else if (is_i32) {
+            res = jit_cc_new_reg_I32(cc);
+            GEN_INSN(AND, res, rhs, NEW_CONST(I32, 0x1f));
+        }
+        else {
+            res = jit_cc_new_reg_I64(cc);
+            GEN_INSN(AND, res, rhs, NEW_CONST(I64, 0x3f));
+        }
+    }
+
+    return res;
+}
+
+static JitReg
+mov_left_to_reg(JitCompContext *cc, bool is_i32, JitReg left)
+{
+    JitReg res = left;
+    /* left needs to be a variable */
+    if (jit_reg_is_const(left)) {
+        res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(MOV, res, left);
+    }
+    return res;
+}
+
+static JitReg
+compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
+    JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
+    JitInsn *insn = NULL;
+#endif
+
+    right = compile_int_shift_modulo(cc, right, is_i32, SHL);
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, shl);
+    if (res)
+        goto shortcut;
+
+    left = mov_left_to_reg(cc, is_i32, left);
+
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
+    insn = GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
+    jit_lock_reg_in_insn(cc, insn, ecx_hreg);
+#else
+    GEN_INSN(SHL, res, left, right);
+#endif
+
+shortcut:
+    return res;
+}
+
+static JitReg
+compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
+    JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
+    JitInsn *insn = NULL;
+#endif
+
+    right = compile_int_shift_modulo(cc, right, is_i32, SHRS);
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, shrs);
+    if (res)
+        goto shortcut;
+
+    left = mov_left_to_reg(cc, is_i32, left);
+
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
+    insn = GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg);
+    jit_lock_reg_in_insn(cc, insn, ecx_hreg);
+#else
+    GEN_INSN(SHRS, res, left, right);
+#endif
+
+shortcut:
+    return res;
+}
+
+static JitReg
+compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
+    JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
+    JitInsn *insn = NULL;
+#endif
+
+    right = compile_int_shift_modulo(cc, right, is_i32, SHRU);
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, shru);
+    if (res)
+        goto shortcut;
+
+    left = mov_left_to_reg(cc, is_i32, left);
+
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
+    insn = GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg);
+    jit_lock_reg_in_insn(cc, insn, ecx_hreg);
+#else
+    GEN_INSN(SHRU, res, left, right);
+#endif
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(rotl)
+{
+    if (IS_CONST_ZERO(right) || IS_CONST_ZERO(left)
+        || IS_CONST_ALL_ONE(left, is_i32))
+        return left;
+
+    if (jit_reg_is_const(right)) {
+        JitReg res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(ROTL, res, left, right);
+        return res;
+    }
+
+    return 0;
+}
+
+static int32
+do_i32_const_rotl(int32 lhs, int32 rhs)
+{
+    uint32 n = (uint32)lhs;
+    uint32 d = (uint32)rhs;
+    return (n << d) | (n >> (32 - d));
+}
+
+static int64
+do_i64_const_rotl(int64 lhs, int64 rhs)
+{
+    uint64 n = (uint64)lhs;
+    uint64 d = (uint64)rhs;
+    return (n << d) | (n >> (64 - d));
+}
+
+static JitReg
+compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
+    JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
+    JitInsn *insn = NULL;
+#endif
+
+    right = compile_int_shift_modulo(cc, right, is_i32, ROTL);
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, rotl);
+    if (res)
+        goto shortcut;
+
+    left = mov_left_to_reg(cc, is_i32, left);
+
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
+    insn = GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
+    jit_lock_reg_in_insn(cc, insn, ecx_hreg);
+#else
+    GEN_INSN(ROTL, res, left, right);
+#endif
+
+shortcut:
+    return res;
+}
+
+DEF_UNI_INT_CONST_OPS(rotr)
+{
+    if (IS_CONST_ZERO(right) || IS_CONST_ZERO(left)
+        || IS_CONST_ALL_ONE(left, is_i32))
+        return left;
+
+    if (jit_reg_is_const(right)) {
+        JitReg res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+        GEN_INSN(ROTR, res, left, right);
+        return res;
+    }
+
+    return 0;
+}
+
+static int32
+do_i32_const_rotr(int32 lhs, int32 rhs)
+{
+    uint32 n = (uint32)lhs;
+    uint32 d = (uint32)rhs;
+    return (n >> d) | (n << (32 - d));
+}
+
+static int64
+do_i64_const_rotr(int64 lhs, int64 rhs)
+{
+    uint64 n = (uint64)lhs;
+    uint64 d = (uint64)rhs;
+    return (n >> d) | (n << (64 - d));
+}
+
+static JitReg
+compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
+{
+    JitReg res;
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
+    JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
+    JitInsn *insn = NULL;
+#endif
+
+    right = compile_int_shift_modulo(cc, right, is_i32, ROTR);
+
+    res = CHECK_AND_PROCESS_INT_CONSTS(cc, left, right, is_i32, rotr);
+    if (res)
+        goto shortcut;
+
+    left = mov_left_to_reg(cc, is_i32, left);
+
+    res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
+    insn = GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg);
+    jit_lock_reg_in_insn(cc, insn, ecx_hreg);
+#else
+    GEN_INSN(ROTR, res, left, right);
+#endif
+
+shortcut:
+    return res;
+}
+
+static bool
+compile_op_int_shift(JitCompContext *cc, IntShift shift_op, bool is_i32)
+{
+    JitReg left, right, res;
+
+    POP_INT(right);
+    POP_INT(left);
+
+    switch (shift_op) {
+        case INT_SHL:
+        {
+            res = compile_int_shl(cc, left, right, is_i32);
+            break;
+        }
+        case INT_SHR_S:
+        {
+            res = compile_int_shrs(cc, left, right, is_i32);
+            break;
+        }
+        case INT_SHR_U:
+        {
+            res = compile_int_shru(cc, left, right, is_i32);
+            break;
+        }
+        case INT_ROTL:
+        {
+            res = compile_int_rotl(cc, left, right, is_i32);
+            break;
+        }
+        case INT_ROTR:
+        {
+            res = compile_int_rotr(cc, left, right, is_i32);
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    PUSH_INT(res);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_i32_shift(JitCompContext *cc, IntShift shift_op)
+{
+    return compile_op_int_shift(cc, shift_op, true);
+}
+
+bool
+jit_compile_op_i64_shift(JitCompContext *cc, IntShift shift_op)
+{
+    return compile_op_int_shift(cc, shift_op, false);
+}
+
+static float32
+negf(float32 f32)
+{
+    return -f32;
+}
+
+static float64
+neg(float64 f64)
+{
+    return -f64;
+}
+
+static bool
+compile_op_float_math(JitCompContext *cc, FloatMath math_op, bool is_f32)
+{
+    JitReg value, res;
+    void *func = NULL;
+
+    if (is_f32)
+        res = jit_cc_new_reg_F32(cc);
+    else
+        res = jit_cc_new_reg_F64(cc);
+
+    if (is_f32)
+        POP_F32(value);
+    else
+        POP_F64(value);
+
+    switch (math_op) {
+        case FLOAT_ABS:
+            /* TODO: andps 0x7fffffffffffffff */
+            func = is_f32 ? (void *)fabsf : (void *)fabs;
+            break;
+        case FLOAT_NEG:
+            /* TODO: xorps 0x8000000000000000 */
+            func = is_f32 ? (void *)negf : (void *)neg;
+            break;
+        case FLOAT_CEIL:
+            func = is_f32 ? (void *)ceilf : (void *)ceil;
+            break;
+        case FLOAT_FLOOR:
+            func = is_f32 ? (void *)floorf : (void *)floor;
+            break;
+        case FLOAT_TRUNC:
+            func = is_f32 ? (void *)truncf : (void *)trunc;
+            break;
+        case FLOAT_NEAREST:
+            func = is_f32 ? (void *)rintf : (void *)rint;
+            break;
+        case FLOAT_SQRT:
+            func = is_f32 ? (void *)sqrtf : (void *)sqrt;
+            break;
+        default:
+            bh_assert(0);
+            goto fail;
+    }
+
+    if (!jit_emit_callnative(cc, func, res, &value, 1)) {
+        goto fail;
+    }
+
+    if (is_f32)
+        PUSH_F32(res);
+    else
+        PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_math(JitCompContext *cc, FloatMath math_op)
+{
+    return compile_op_float_math(cc, math_op, true);
+}
+
+bool
+jit_compile_op_f64_math(JitCompContext *cc, FloatMath math_op)
+{
+    return compile_op_float_math(cc, math_op, false);
+}
+
+static float32
+local_minf(float32 f1, float32 f2)
+{
+    if (isnan(f1))
+        return f1;
+    if (isnan(f2))
+        return f2;
+
+    return fminf(f1, f2);
+}
+
+static float64
+local_min(float64 f1, float64 f2)
+{
+    if (isnan(f1))
+        return f1;
+    if (isnan(f2))
+        return f2;
+
+    return fmin(f1, f2);
+}
+
+static float32
+local_maxf(float32 f1, float32 f2)
+{
+    if (isnan(f1))
+        return f1;
+    if (isnan(f2))
+        return f2;
+
+    return fmaxf(f1, f2);
+}
+
+static float64
+local_max(float64 f1, float64 f2)
+{
+    if (isnan(f1))
+        return f1;
+    if (isnan(f2))
+        return f2;
+
+    return fmax(f1, f2);
+}
+
+static bool
+compile_op_float_min_max(JitCompContext *cc, FloatArithmetic arith_op,
+                         bool is_f32, JitReg lhs, JitReg rhs, JitReg *out)
+{
+    JitReg res, args[2];
+    void *func;
+
+    res = is_f32 ? jit_cc_new_reg_F32(cc) : jit_cc_new_reg_F64(cc);
+    if (arith_op == FLOAT_MIN)
+        func = is_f32 ? (void *)local_minf : (void *)local_min;
+    else
+        func = is_f32 ? (void *)local_maxf : (void *)local_max;
+
+    args[0] = lhs;
+    args[1] = rhs;
+    if (!jit_emit_callnative(cc, func, res, args, 2))
+        return false;
+
+    *out = res;
+    return true;
+}
+
+static bool
+compile_op_float_arithmetic(JitCompContext *cc, FloatArithmetic arith_op,
+                            bool is_f32)
+{
+    JitReg lhs, rhs, res;
+
+    if (is_f32) {
+        POP_F32(rhs);
+        POP_F32(lhs);
+        res = jit_cc_new_reg_F32(cc);
+    }
+    else {
+        POP_F64(rhs);
+        POP_F64(lhs);
+        res = jit_cc_new_reg_F64(cc);
+    }
+
+    switch (arith_op) {
+        case FLOAT_ADD:
+        {
+            GEN_INSN(ADD, res, lhs, rhs);
+            break;
+        }
+        case FLOAT_SUB:
+        {
+            GEN_INSN(SUB, res, lhs, rhs);
+            break;
+        }
+        case FLOAT_MUL:
+        {
+            GEN_INSN(MUL, res, lhs, rhs);
+            break;
+        }
+        case FLOAT_DIV:
+        {
+            GEN_INSN(DIV_S, res, lhs, rhs);
+            break;
+        }
+        case FLOAT_MIN:
+        case FLOAT_MAX:
+        {
+            if (!compile_op_float_min_max(cc, arith_op, is_f32, lhs, rhs, &res))
+                goto fail;
+            break;
+        }
+        default:
+        {
+            bh_assert(0);
+            goto fail;
+        }
+    }
+
+    if (is_f32)
+        PUSH_F32(res);
+    else
+        PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f32_arithmetic(JitCompContext *cc, FloatArithmetic arith_op)
+{
+    return compile_op_float_arithmetic(cc, arith_op, true);
+}
+
+bool
+jit_compile_op_f64_arithmetic(JitCompContext *cc, FloatArithmetic arith_op)
+{
+    return compile_op_float_arithmetic(cc, arith_op, false);
+}
+
+bool
+jit_compile_op_f32_copysign(JitCompContext *cc)
+{
+    JitReg res;
+    JitReg args[2] = { 0 };
+
+    POP_F32(args[1]);
+    POP_F32(args[0]);
+
+    res = jit_cc_new_reg_F32(cc);
+    if (!jit_emit_callnative(cc, copysignf, res, args, 2))
+        goto fail;
+
+    PUSH_F32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_f64_copysign(JitCompContext *cc)
+{
+    JitReg res;
+    JitReg args[2] = { 0 };
+
+    POP_F64(args[1]);
+    POP_F64(args[0]);
+
+    res = jit_cc_new_reg_F64(cc);
+    if (!jit_emit_callnative(cc, copysign, res, args, 2))
+        goto fail;
+
+    PUSH_F64(res);
+
+    return true;
+fail:
+    return false;
+}

+ 76 - 0
core/iwasm/fast-jit/fe/jit_emit_numberic.h

@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_NUMBERIC_H_
+#define _JIT_EMIT_NUMBERIC_H_
+
+#include "../jit_compiler.h"
+#include "../jit_frontend.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_i32_clz(JitCompContext *cc);
+
+bool
+jit_compile_op_i32_ctz(JitCompContext *cc);
+
+bool
+jit_compile_op_i32_popcnt(JitCompContext *cc);
+
+bool
+jit_compile_op_i64_clz(JitCompContext *cc);
+
+bool
+jit_compile_op_i64_ctz(JitCompContext *cc);
+
+bool
+jit_compile_op_i64_popcnt(JitCompContext *cc);
+
+bool
+jit_compile_op_i32_arithmetic(JitCompContext *cc, IntArithmetic arith_op,
+                              uint8 **p_frame_ip);
+
+bool
+jit_compile_op_i64_arithmetic(JitCompContext *cc, IntArithmetic arith_op,
+                              uint8 **p_frame_ip);
+
+bool
+jit_compile_op_i32_bitwise(JitCompContext *cc, IntBitwise bitwise_op);
+
+bool
+jit_compile_op_i64_bitwise(JitCompContext *cc, IntBitwise bitwise_op);
+
+bool
+jit_compile_op_i32_shift(JitCompContext *cc, IntShift shift_op);
+
+bool
+jit_compile_op_i64_shift(JitCompContext *cc, IntShift shift_op);
+
+bool
+jit_compile_op_f32_math(JitCompContext *cc, FloatMath math_op);
+
+bool
+jit_compile_op_f64_math(JitCompContext *cc, FloatMath math_op);
+
+bool
+jit_compile_op_f32_arithmetic(JitCompContext *cc, FloatArithmetic arith_op);
+
+bool
+jit_compile_op_f64_arithmetic(JitCompContext *cc, FloatArithmetic arith_op);
+
+bool
+jit_compile_op_f32_copysign(JitCompContext *cc);
+
+bool
+jit_compile_op_f64_copysign(JitCompContext *cc);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_NUMBERIC_H_ */

+ 130 - 0
core/iwasm/fast-jit/fe/jit_emit_parametric.c

@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_parametric.h"
+#include "../jit_frontend.h"
+
+static bool
+pop_value_from_wasm_stack(JitCompContext *cc, bool is_32bit, JitReg *p_value,
+                          uint8 *p_type)
+{
+    JitValue *jit_value;
+    JitReg value;
+    uint8 type;
+
+    if (!jit_block_stack_top(&cc->block_stack)) {
+        jit_set_last_error(cc, "WASM block stack underflow.");
+        return false;
+    }
+    if (!jit_block_stack_top(&cc->block_stack)->value_stack.value_list_end) {
+        jit_set_last_error(cc, "WASM data stack underflow.");
+        return false;
+    }
+
+    jit_value = jit_value_stack_pop(
+        &jit_block_stack_top(&cc->block_stack)->value_stack);
+    type = jit_value->type;
+
+    if (p_type != NULL) {
+        *p_type = jit_value->type;
+    }
+
+    wasm_runtime_free(jit_value);
+
+    /* is_32: i32, f32, ref.func, ref.extern, v128 */
+    if (is_32bit
+        && !(type == VALUE_TYPE_I32 || type == VALUE_TYPE_F32
+#if WASM_ENABLE_REF_TYPES != 0
+             || type == VALUE_TYPE_FUNCREF || type == VALUE_TYPE_EXTERNREF
+#endif
+             || type == VALUE_TYPE_V128)) {
+        jit_set_last_error(cc, "invalid WASM stack data type.");
+        return false;
+    }
+    /* !is_32: i64, f64 */
+    if (!is_32bit && !(type == VALUE_TYPE_I64 || type == VALUE_TYPE_F64)) {
+        jit_set_last_error(cc, "invalid WASM stack data type.");
+        return false;
+    }
+
+    switch (type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_FUNCREF:
+        case VALUE_TYPE_EXTERNREF:
+#endif
+            value = pop_i32(cc->jit_frame);
+            break;
+        case VALUE_TYPE_I64:
+            value = pop_i64(cc->jit_frame);
+            break;
+        case VALUE_TYPE_F32:
+            value = pop_f32(cc->jit_frame);
+            break;
+        case VALUE_TYPE_F64:
+            value = pop_f64(cc->jit_frame);
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+    if (p_value != NULL) {
+        *p_value = value;
+    }
+    return true;
+}
+
+bool
+jit_compile_op_drop(JitCompContext *cc, bool is_drop_32)
+{
+    if (!pop_value_from_wasm_stack(cc, is_drop_32, NULL, NULL))
+        return false;
+    return true;
+}
+
+bool
+jit_compile_op_select(JitCompContext *cc, bool is_select_32)
+{
+    JitReg val1, val2, cond, selected;
+    uint8 val1_type, val2_type;
+
+    POP_I32(cond);
+
+    if (!pop_value_from_wasm_stack(cc, is_select_32, &val2, &val2_type)
+        || !pop_value_from_wasm_stack(cc, is_select_32, &val1, &val1_type)) {
+        return false;
+    }
+
+    if (val1_type != val2_type) {
+        jit_set_last_error(cc, "invalid stack values with different type");
+        return false;
+    }
+
+    switch (val1_type) {
+        case VALUE_TYPE_I32:
+            selected = jit_cc_new_reg_I32(cc);
+            break;
+        case VALUE_TYPE_I64:
+            selected = jit_cc_new_reg_I64(cc);
+            break;
+        case VALUE_TYPE_F32:
+            selected = jit_cc_new_reg_F32(cc);
+            break;
+        case VALUE_TYPE_F64:
+            selected = jit_cc_new_reg_F64(cc);
+            break;
+        default:
+            bh_assert(0);
+            return false;
+    }
+
+    GEN_INSN(CMP, cc->cmp_reg, cond, NEW_CONST(I32, 0));
+    GEN_INSN(SELECTNE, selected, cc->cmp_reg, val1, val2);
+    PUSH(selected, val1_type);
+    return true;
+fail:
+    return false;
+}

+ 25 - 0
core/iwasm/fast-jit/fe/jit_emit_parametric.h

@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_PARAMETRIC_H_
+#define _JIT_EMIT_PARAMETRIC_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_drop(JitCompContext *cc, bool is_drop_32);
+
+bool
+jit_compile_op_select(JitCompContext *cc, bool is_select_32);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_PARAMETRIC_H_ */

+ 318 - 0
core/iwasm/fast-jit/fe/jit_emit_table.c

@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_table.h"
+#include "jit_emit_exception.h"
+#include "jit_emit_function.h"
+#include "../../interpreter/wasm_runtime.h"
+#include "../jit_frontend.h"
+
+#if WASM_ENABLE_REF_TYPES != 0
+bool
+jit_compile_op_elem_drop(JitCompContext *cc, uint32 tbl_seg_idx)
+{
+    JitReg module, tbl_segs;
+
+    module = get_module_reg(cc->jit_frame);
+
+    tbl_segs = jit_cc_new_reg_ptr(cc);
+    GEN_INSN(LDPTR, tbl_segs, module,
+             NEW_CONST(I32, offsetof(WASMModule, table_segments)));
+
+    GEN_INSN(STI32, NEW_CONST(I32, true), tbl_segs,
+             NEW_CONST(I32, tbl_seg_idx * sizeof(WASMTableSeg)
+                                + offsetof(WASMTableSeg, is_dropped)));
+    return true;
+}
+
+bool
+jit_compile_op_table_get(JitCompContext *cc, uint32 tbl_idx)
+{
+    JitReg elem_idx, tbl_sz, tbl_data, elem_idx_long, offset, res;
+
+    POP_I32(elem_idx);
+
+    /* if (elem_idx >= tbl_sz) goto exception; */
+    tbl_sz = get_table_cur_size_reg(cc->jit_frame, tbl_idx);
+    GEN_INSN(CMP, cc->cmp_reg, elem_idx, tbl_sz);
+    if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_TABLE_ACCESS,
+                            JIT_OP_BGEU, cc->cmp_reg, NULL))
+        goto fail;
+
+    elem_idx_long = jit_cc_new_reg_I64(cc);
+    GEN_INSN(I32TOI64, elem_idx_long, elem_idx);
+
+    offset = jit_cc_new_reg_I64(cc);
+    GEN_INSN(MUL, offset, elem_idx_long, NEW_CONST(I64, sizeof(uint32)));
+
+    res = jit_cc_new_reg_I32(cc);
+    tbl_data = get_table_data_reg(cc->jit_frame, tbl_idx);
+    GEN_INSN(LDI32, res, tbl_data, offset);
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_table_set(JitCompContext *cc, uint32 tbl_idx)
+{
+    JitReg elem_idx, elem_val, tbl_sz, tbl_data, elem_idx_long, offset;
+
+    POP_I32(elem_val);
+    POP_I32(elem_idx);
+
+    /* if (elem_idx >= tbl_sz) goto exception; */
+    tbl_sz = get_table_cur_size_reg(cc->jit_frame, tbl_idx);
+    GEN_INSN(CMP, cc->cmp_reg, elem_idx, tbl_sz);
+    if (!jit_emit_exception(cc, JIT_EXCE_OUT_OF_BOUNDS_TABLE_ACCESS,
+                            JIT_OP_BGEU, cc->cmp_reg, NULL))
+        goto fail;
+
+    elem_idx_long = jit_cc_new_reg_I64(cc);
+    GEN_INSN(I32TOI64, elem_idx_long, elem_idx);
+
+    offset = jit_cc_new_reg_I64(cc);
+    GEN_INSN(MUL, offset, elem_idx_long, NEW_CONST(I64, sizeof(uint32)));
+
+    tbl_data = get_table_data_reg(cc->jit_frame, tbl_idx);
+    GEN_INSN(STI32, elem_val, tbl_data, offset);
+
+    return true;
+fail:
+    return false;
+}
+
+static int
+wasm_init_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 elem_idx,
+                uint32 dst, uint32 len, uint32 src)
+{
+    WASMTableInstance *tbl;
+    uint32 tbl_sz;
+    WASMTableSeg *elem;
+    uint32 elem_len;
+
+    tbl = inst->tables[tbl_idx];
+    tbl_sz = tbl->cur_size;
+    if (dst > tbl_sz || tbl_sz - dst < len)
+        goto out_of_bounds;
+
+    elem = inst->module->table_segments + elem_idx;
+    elem_len = elem->function_count;
+    if (src > elem_len || elem_len - src < len)
+        goto out_of_bounds;
+
+    bh_memcpy_s((uint8 *)(tbl) + offsetof(WASMTableInstance, base_addr)
+                    + dst * sizeof(uint32),
+                (uint32)((tbl_sz - dst) * sizeof(uint32)),
+                elem->func_indexes + src, (uint32)(len * sizeof(uint32)));
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds table access");
+    return -1;
+}
+
+bool
+jit_compile_op_table_init(JitCompContext *cc, uint32 tbl_idx,
+                          uint32 tbl_seg_idx)
+{
+    JitReg len, src, dst, res;
+    JitReg args[6] = { 0 };
+
+    POP_I32(len);
+    POP_I32(src);
+    POP_I32(dst);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, tbl_idx);
+    args[2] = NEW_CONST(I32, tbl_seg_idx);
+    args[3] = dst;
+    args[4] = len;
+    args[5] = src;
+
+    if (!jit_emit_callnative(cc, wasm_init_table, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+
+static int
+wasm_copy_table(WASMModuleInstance *inst, uint32 src_tbl_idx,
+                uint32 dst_tbl_idx, uint32 dst_offset, uint32 len,
+                uint32 src_offset)
+{
+    WASMTableInstance *src_tbl, *dst_tbl;
+    uint32 src_tbl_sz, dst_tbl_sz;
+
+    src_tbl = inst->tables[src_tbl_idx];
+    src_tbl_sz = src_tbl->cur_size;
+    if (src_offset > src_tbl_sz || src_tbl_sz - src_offset < len)
+        goto out_of_bounds;
+
+    dst_tbl = inst->tables[dst_tbl_idx];
+    dst_tbl_sz = dst_tbl->cur_size;
+    if (dst_offset > dst_tbl_sz || dst_tbl_sz - dst_offset < len)
+        goto out_of_bounds;
+
+    bh_memmove_s((uint8 *)(dst_tbl) + offsetof(WASMTableInstance, base_addr)
+                     + dst_offset * sizeof(uint32),
+                 (uint32)((dst_tbl_sz - dst_offset) * sizeof(uint32)),
+                 (uint8 *)(src_tbl) + offsetof(WASMTableInstance, base_addr)
+                     + src_offset * sizeof(uint32),
+                 (uint32)(len * sizeof(uint32)));
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds table access");
+    return -1;
+}
+
+bool
+jit_compile_op_table_copy(JitCompContext *cc, uint32 src_tbl_idx,
+                          uint32 dst_tbl_idx)
+{
+    JitReg len, src, dst, res;
+    JitReg args[6] = { 0 };
+
+    POP_I32(len);
+    POP_I32(src);
+    POP_I32(dst);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, src_tbl_idx);
+    args[2] = NEW_CONST(I32, dst_tbl_idx);
+    args[3] = dst;
+    args[4] = len;
+    args[5] = src;
+
+    if (!jit_emit_callnative(cc, wasm_copy_table, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_table_size(JitCompContext *cc, uint32 tbl_idx)
+{
+    JitReg res;
+
+    res = get_table_cur_size_reg(cc->jit_frame, tbl_idx);
+    PUSH_I32(res);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_table_grow(JitCompContext *cc, uint32 tbl_idx)
+{
+    JitReg tbl_sz, n, val, enlarge_ret, res;
+    JitReg args[4] = { 0 };
+
+    POP_I32(n);
+    POP_I32(val);
+
+    tbl_sz = get_table_cur_size_reg(cc->jit_frame, tbl_idx);
+
+    enlarge_ret = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, tbl_idx);
+    args[2] = n;
+    args[3] = val;
+
+    if (!jit_emit_callnative(cc, wasm_enlarge_table, enlarge_ret, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    /* Convert bool to uint32 */
+    GEN_INSN(AND, enlarge_ret, enlarge_ret, NEW_CONST(I32, 0xFF));
+
+    res = jit_cc_new_reg_I32(cc);
+    GEN_INSN(CMP, cc->cmp_reg, enlarge_ret, NEW_CONST(I32, 1));
+    GEN_INSN(SELECTEQ, res, cc->cmp_reg, tbl_sz, NEW_CONST(I32, -1));
+    PUSH_I32(res);
+
+    /* Ensure a refresh in next get memory related registers */
+    clear_table_regs(cc->jit_frame);
+    return true;
+fail:
+    return false;
+}
+
+static int
+wasm_fill_table(WASMModuleInstance *inst, uint32 tbl_idx, uint32 dst,
+                uint32 val, uint32 len)
+{
+    WASMTableInstance *tbl;
+    uint32 tbl_sz;
+
+    tbl = inst->tables[tbl_idx];
+    tbl_sz = tbl->cur_size;
+
+    if (dst > tbl_sz || tbl_sz - dst < len)
+        goto out_of_bounds;
+
+    for (; len != 0; dst++, len--) {
+        ((uint32 *)(tbl->base_addr))[dst] = val;
+    }
+
+    return 0;
+out_of_bounds:
+    wasm_set_exception(inst, "out of bounds table access");
+    return -1;
+}
+
+bool
+jit_compile_op_table_fill(JitCompContext *cc, uint32 tbl_idx)
+{
+    JitReg len, val, dst, res;
+    JitReg args[5] = { 0 };
+
+    POP_I32(len);
+    POP_I32(val);
+    POP_I32(dst);
+
+    res = jit_cc_new_reg_I32(cc);
+    args[0] = get_module_inst_reg(cc->jit_frame);
+    args[1] = NEW_CONST(I32, tbl_idx);
+    args[2] = dst;
+    args[3] = val;
+    args[4] = len;
+
+    if (!jit_emit_callnative(cc, wasm_fill_table, res, args,
+                             sizeof(args) / sizeof(args[0])))
+        goto fail;
+
+    GEN_INSN(CMP, cc->cmp_reg, res, NEW_CONST(I32, 0));
+    if (!jit_emit_exception(cc, JIT_EXCE_ALREADY_THROWN, JIT_OP_BLTS,
+                            cc->cmp_reg, NULL))
+        goto fail;
+
+    return true;
+fail:
+    return false;
+}
+#endif

+ 47 - 0
core/iwasm/fast-jit/fe/jit_emit_table.h

@@ -0,0 +1,47 @@
+
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_TABLE_H_
+#define _JIT_EMIT_TABLE_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if WASM_ENABLE_REF_TYPES != 0
+bool
+jit_compile_op_elem_drop(JitCompContext *cc, uint32 tbl_seg_idx);
+
+bool
+jit_compile_op_table_get(JitCompContext *cc, uint32 tbl_idx);
+
+bool
+jit_compile_op_table_set(JitCompContext *cc, uint32 tbl_idx);
+
+bool
+jit_compile_op_table_init(JitCompContext *cc, uint32 tbl_idx,
+                          uint32 tbl_seg_idx);
+
+bool
+jit_compile_op_table_copy(JitCompContext *cc, uint32 src_tbl_idx,
+                          uint32 dst_tbl_idx);
+
+bool
+jit_compile_op_table_size(JitCompContext *cc, uint32 tbl_idx);
+
+bool
+jit_compile_op_table_grow(JitCompContext *cc, uint32 tbl_idx);
+
+bool
+jit_compile_op_table_fill(JitCompContext *cc, uint32 tbl_idx);
+#endif
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+#endif

+ 323 - 0
core/iwasm/fast-jit/fe/jit_emit_variable.c

@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_emit_variable.h"
+#include "jit_emit_exception.h"
+#include "../jit_frontend.h"
+
+#define CHECK_LOCAL(idx)                                                     \
+    do {                                                                     \
+        if (idx                                                              \
+            >= wasm_func->func_type->param_count + wasm_func->local_count) { \
+            jit_set_last_error(cc, "local index out of range");              \
+            goto fail;                                                       \
+        }                                                                    \
+    } while (0)
+
+static uint8
+get_local_type(const WASMFunction *wasm_func, uint32 local_idx)
+{
+    uint32 param_count = wasm_func->func_type->param_count;
+    return local_idx < param_count
+               ? wasm_func->func_type->types[local_idx]
+               : wasm_func->local_types[local_idx - param_count];
+}
+
+bool
+jit_compile_op_get_local(JitCompContext *cc, uint32 local_idx)
+{
+    WASMFunction *wasm_func = cc->cur_wasm_func;
+    uint16 *local_offsets = wasm_func->local_offsets;
+    uint16 local_offset;
+    uint8 local_type;
+    JitReg value = 0;
+
+    CHECK_LOCAL(local_idx);
+
+    local_offset = local_offsets[local_idx];
+    local_type = get_local_type(wasm_func, local_idx);
+
+    switch (local_type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_EXTERNREF:
+        case VALUE_TYPE_FUNCREF:
+#endif
+            value = local_i32(cc->jit_frame, local_offset);
+
+            break;
+        case VALUE_TYPE_I64:
+            value = local_i64(cc->jit_frame, local_offset);
+            break;
+        case VALUE_TYPE_F32:
+            value = local_f32(cc->jit_frame, local_offset);
+            break;
+        case VALUE_TYPE_F64:
+            value = local_f64(cc->jit_frame, local_offset);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    PUSH(value, local_type);
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_set_local(JitCompContext *cc, uint32 local_idx)
+{
+    WASMFunction *wasm_func = cc->cur_wasm_func;
+    uint16 *local_offsets = wasm_func->local_offsets;
+    uint16 local_offset;
+    uint8 local_type;
+    JitReg value;
+
+    CHECK_LOCAL(local_idx);
+
+    local_offset = local_offsets[local_idx];
+    local_type = get_local_type(wasm_func, local_idx);
+
+    switch (local_type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_EXTERNREF:
+        case VALUE_TYPE_FUNCREF:
+#endif
+            POP_I32(value);
+            set_local_i32(cc->jit_frame, local_offset, value);
+            break;
+        case VALUE_TYPE_I64:
+            POP_I64(value);
+            set_local_i64(cc->jit_frame, local_offset, value);
+            break;
+        case VALUE_TYPE_F32:
+            POP_F32(value);
+            set_local_f32(cc->jit_frame, local_offset, value);
+            break;
+        case VALUE_TYPE_F64:
+            POP_F64(value);
+            set_local_f64(cc->jit_frame, local_offset, value);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_tee_local(JitCompContext *cc, uint32 local_idx)
+{
+    WASMFunction *wasm_func = cc->cur_wasm_func;
+    uint16 *local_offsets = wasm_func->local_offsets;
+    uint16 local_offset;
+    uint8 local_type;
+    JitReg value = 0;
+
+    CHECK_LOCAL(local_idx);
+
+    local_offset = local_offsets[local_idx];
+    local_type = get_local_type(wasm_func, local_idx);
+
+    switch (local_type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_EXTERNREF:
+        case VALUE_TYPE_FUNCREF:
+#endif
+            POP_I32(value);
+            set_local_i32(cc->jit_frame, local_offset, value);
+            PUSH_I32(value);
+            break;
+        case VALUE_TYPE_I64:
+            POP_I64(value);
+            set_local_i64(cc->jit_frame, local_offset, value);
+            PUSH_I64(value);
+            break;
+        case VALUE_TYPE_F32:
+            POP_F32(value);
+            set_local_f32(cc->jit_frame, local_offset, value);
+            PUSH_F32(value);
+            break;
+        case VALUE_TYPE_F64:
+            POP_F64(value);
+            set_local_f64(cc->jit_frame, local_offset, value);
+            PUSH_F64(value);
+            break;
+        default:
+            bh_assert(0);
+            goto fail;
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+static uint8
+get_global_type(const WASMModule *module, uint32 global_idx)
+{
+    if (global_idx < module->import_global_count) {
+        const WASMGlobalImport *import_global =
+            &((module->import_globals + global_idx)->u.global);
+        return import_global->type;
+    }
+    else {
+        const WASMGlobal *global =
+            module->globals + (global_idx - module->import_global_count);
+        return global->type;
+    }
+}
+
+static uint32
+get_global_data_offset(const WASMModule *module, uint32 global_idx)
+{
+    if (global_idx < module->import_global_count) {
+        const WASMGlobalImport *import_global =
+            &((module->import_globals + global_idx)->u.global);
+        return import_global->data_offset;
+    }
+    else {
+        const WASMGlobal *global =
+            module->globals + (global_idx - module->import_global_count);
+        return global->data_offset;
+    }
+}
+
+bool
+jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx)
+{
+    uint32 data_offset;
+    uint8 global_type = 0;
+    JitReg value = 0;
+
+    bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+                               + cc->cur_wasm_module->global_count);
+
+    data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
+    global_type = get_global_type(cc->cur_wasm_module, global_idx);
+    switch (global_type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_EXTERNREF:
+        case VALUE_TYPE_FUNCREF:
+#endif
+        {
+            value = jit_cc_new_reg_I32(cc);
+            GEN_INSN(LDI32, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_I64:
+        {
+            value = jit_cc_new_reg_I64(cc);
+            GEN_INSN(LDI64, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_F32:
+        {
+            value = jit_cc_new_reg_F32(cc);
+            GEN_INSN(LDF32, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_F64:
+        {
+            value = jit_cc_new_reg_F64(cc);
+            GEN_INSN(LDF64, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        default:
+        {
+            jit_set_last_error(cc, "unexpected global type");
+            goto fail;
+        }
+    }
+
+    PUSH(value, global_type);
+
+    return true;
+fail:
+    return false;
+}
+
+bool
+jit_compile_op_set_global(JitCompContext *cc, uint32 global_idx,
+                          bool is_aux_stack)
+{
+    uint32 data_offset;
+    uint8 global_type = 0;
+    JitReg value = 0;
+
+    bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+                               + cc->cur_wasm_module->global_count);
+
+    data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
+    global_type = get_global_type(cc->cur_wasm_module, global_idx);
+    switch (global_type) {
+        case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+        case VALUE_TYPE_EXTERNREF:
+        case VALUE_TYPE_FUNCREF:
+#endif
+        {
+            POP_I32(value);
+            if (is_aux_stack) {
+                JitReg aux_stack_bound = get_aux_stack_bound_reg(cc->jit_frame);
+                JitReg aux_stack_bottom =
+                    get_aux_stack_bottom_reg(cc->jit_frame);
+                GEN_INSN(CMP, cc->cmp_reg, value, aux_stack_bound);
+                if (!(jit_emit_exception(cc, JIT_EXCE_AUX_STACK_OVERFLOW,
+                                         JIT_OP_BLEU, cc->cmp_reg, NULL)))
+                    goto fail;
+                GEN_INSN(CMP, cc->cmp_reg, value, aux_stack_bottom);
+                if (!(jit_emit_exception(cc, JIT_EXCE_AUX_STACK_UNDERFLOW,
+                                         JIT_OP_BGTU, cc->cmp_reg, NULL)))
+                    goto fail;
+            }
+            GEN_INSN(STI32, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_I64:
+        {
+            POP_I64(value);
+            GEN_INSN(STI64, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_F32:
+        {
+            POP_F32(value);
+            GEN_INSN(STF32, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        case VALUE_TYPE_F64:
+        {
+            POP_F64(value);
+            GEN_INSN(STF64, value, get_global_data_reg(cc->jit_frame),
+                     NEW_CONST(I32, data_offset));
+            break;
+        }
+        default:
+        {
+            jit_set_last_error(cc, "unexpected global type");
+            goto fail;
+        }
+    }
+
+    return true;
+fail:
+    return false;
+}

+ 35 - 0
core/iwasm/fast-jit/fe/jit_emit_variable.h

@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_EMIT_VARIABLE_H_
+#define _JIT_EMIT_VARIABLE_H_
+
+#include "../jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_compile_op_get_local(JitCompContext *cc, uint32 local_idx);
+
+bool
+jit_compile_op_set_local(JitCompContext *cc, uint32 local_idx);
+
+bool
+jit_compile_op_tee_local(JitCompContext *cc, uint32 local_idx);
+
+bool
+jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx);
+
+bool
+jit_compile_op_set_global(JitCompContext *cc, uint32 global_idx,
+                          bool is_aux_stack);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* end of _JIT_EMIT_VARIABLE_H_ */

+ 95 - 0
core/iwasm/fast-jit/iwasm_fast_jit.cmake

@@ -0,0 +1,95 @@
+# Copyright (C) 2019 Intel Corporation.  All rights reserved.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set (IWASM_FAST_JIT_DIR ${CMAKE_CURRENT_LIST_DIR})
+
+add_definitions(-DWASM_ENABLE_FAST_JIT=1)
+if (WAMR_BUILD_FAST_JIT_DUMP EQUAL 1)
+    add_definitions(-DWASM_ENABLE_FAST_JIT_DUMP=1)
+endif ()
+
+include_directories (${IWASM_FAST_JIT_DIR})
+
+if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
+    include(FetchContent)
+    if (NOT WAMR_BUILD_PLATFORM STREQUAL "linux-sgx")
+        FetchContent_Declare(
+            asmjit
+            GIT_REPOSITORY https://github.com/asmjit/asmjit.git
+        )
+    else ()
+        FetchContent_Declare(
+            asmjit
+            GIT_REPOSITORY https://github.com/asmjit/asmjit.git
+            PATCH_COMMAND  git apply ${IWASM_FAST_JIT_DIR}/asmjit_sgx_patch.diff
+        )
+    endif ()
+    FetchContent_GetProperties(asmjit)
+    if (NOT asmjit_POPULATED)
+        message ("-- Fetching asmjit ..")
+        FetchContent_Populate(asmjit)
+        add_definitions(-DASMJIT_STATIC)
+        add_definitions(-DASMJIT_NO_DEPRECATED)
+        add_definitions(-DASMJIT_NO_BUILDER)
+        add_definitions(-DASMJIT_NO_COMPILER)
+        add_definitions(-DASMJIT_NO_JIT)
+        add_definitions(-DASMJIT_NO_LOGGING)
+        add_definitions(-DASMJIT_NO_TEXT)
+        add_definitions(-DASMJIT_NO_VALIDATION)
+        add_definitions(-DASMJIT_NO_INTROSPECTION)
+        add_definitions(-DASMJIT_NO_INTRINSICS)
+        add_definitions(-DASMJIT_NO_AARCH64)
+        add_definitions(-DASMJIT_NO_AARCH32)
+        include_directories("${asmjit_SOURCE_DIR}/src")
+        add_subdirectory(${asmjit_SOURCE_DIR} ${asmjit_BINARY_DIR} EXCLUDE_FROM_ALL)
+        file (GLOB_RECURSE cpp_source_asmjit
+            ${asmjit_SOURCE_DIR}/src/asmjit/core/*.cpp
+            ${asmjit_SOURCE_DIR}/src/asmjit/x86/*.cpp
+        )
+    endif ()
+    if (WAMR_BUILD_FAST_JIT_DUMP EQUAL 1)
+        FetchContent_Declare(
+            zycore
+            GIT_REPOSITORY https://github.com/zyantific/zycore-c.git
+        )
+        FetchContent_GetProperties(zycore)
+        if (NOT zycore_POPULATED)
+            message ("-- Fetching zycore ..")
+            FetchContent_Populate(zycore)
+            option(ZYDIS_BUILD_TOOLS "" OFF)
+            option(ZYDIS_BUILD_EXAMPLES "" OFF)
+            include_directories("${zycore_SOURCE_DIR}/include")
+            include_directories("${zycore_BINARY_DIR}")
+            add_subdirectory(${zycore_SOURCE_DIR} ${zycore_BINARY_DIR} EXCLUDE_FROM_ALL)
+            file (GLOB_RECURSE c_source_zycore ${zycore_SOURCE_DIR}/src/*.c)
+        endif ()
+        FetchContent_Declare(
+            zydis
+            GIT_REPOSITORY https://github.com/zyantific/zydis.git
+            GIT_TAG e14a07895136182a5b53e181eec3b1c6e0b434de
+        )
+        FetchContent_GetProperties(zydis)
+        if (NOT zydis_POPULATED)
+            message ("-- Fetching zydis ..")
+            FetchContent_Populate(zydis)
+            option(ZYDIS_BUILD_TOOLS "" OFF)
+            option(ZYDIS_BUILD_EXAMPLES "" OFF)
+            include_directories("${zydis_BINARY_DIR}")
+            include_directories("${zydis_SOURCE_DIR}/include")
+            include_directories("${zydis_SOURCE_DIR}/src")
+            add_subdirectory(${zydis_SOURCE_DIR} ${zydis_BINARY_DIR} EXCLUDE_FROM_ALL)
+            file (GLOB_RECURSE c_source_zydis ${zydis_SOURCE_DIR}/src/*.c)
+        endif ()
+    endif ()
+endif ()
+
+file (GLOB c_source_jit ${IWASM_FAST_JIT_DIR}/*.c ${IWASM_FAST_JIT_DIR}/fe/*.c)
+
+if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
+  file (GLOB_RECURSE cpp_source_jit_cg ${IWASM_FAST_JIT_DIR}/cg/x86-64/*.cpp)
+else ()
+  message (FATAL_ERROR "Fast JIT codegen for target ${WAMR_BUILD_TARGET} isn't implemented")
+endif ()
+
+set (IWASM_FAST_JIT_SOURCE ${c_source_jit} ${cpp_source_jit_cg}
+                           ${cpp_source_asmjit} ${c_source_zycore} ${c_source_zydis})

+ 65 - 0
core/iwasm/fast-jit/jit_codecache.c

@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_codecache.h"
+#include "mem_alloc.h"
+#include "jit_compiler.h"
+
+static void *code_cache_pool = NULL;
+static uint32 code_cache_pool_size = 0;
+static mem_allocator_t code_cache_pool_allocator = NULL;
+
+bool
+jit_code_cache_init(uint32 code_cache_size)
+{
+    int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
+    int map_flags = MMAP_MAP_NONE;
+
+    if (!(code_cache_pool =
+              os_mmap(NULL, code_cache_size, map_prot, map_flags))) {
+        return false;
+    }
+
+    if (!(code_cache_pool_allocator =
+              mem_allocator_create(code_cache_pool, code_cache_size))) {
+        os_munmap(code_cache_pool, code_cache_size);
+        code_cache_pool = NULL;
+        return false;
+    }
+
+    code_cache_pool_size = code_cache_size;
+    return true;
+}
+
+void
+jit_code_cache_destroy()
+{
+    mem_allocator_destroy(code_cache_pool_allocator);
+    os_munmap(code_cache_pool, code_cache_pool_size);
+}
+
+void *
+jit_code_cache_alloc(uint32 size)
+{
+    return mem_allocator_malloc(code_cache_pool_allocator, size);
+}
+
+void
+jit_code_cache_free(void *ptr)
+{
+    if (ptr)
+        mem_allocator_free(code_cache_pool_allocator, ptr);
+}
+
+bool
+jit_pass_register_jitted_code(JitCompContext *cc)
+{
+    uint32 jit_func_idx =
+        cc->cur_wasm_func_idx - cc->cur_wasm_module->import_function_count;
+    cc->cur_wasm_func->fast_jit_jitted_code = cc->jitted_addr_begin;
+    cc->cur_wasm_module->fast_jit_func_ptrs[jit_func_idx] =
+        cc->jitted_addr_begin;
+    return true;
+}

+ 31 - 0
core/iwasm/fast-jit/jit_codecache.h

@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_CODE_CACHE_H_
+#define _JIT_CODE_CACHE_H_
+
+#include "bh_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool
+jit_code_cache_init(uint32 code_cache_size);
+
+void
+jit_code_cache_destroy();
+
+void *
+jit_code_cache_alloc(uint32 size);
+
+void
+jit_code_cache_free(void *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _JIT_CODE_CACHE_H_ */

+ 22 - 0
core/iwasm/fast-jit/jit_codegen.c

@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_compiler.h"
+#include "jit_codegen.h"
+
+bool
+jit_pass_lower_cg(JitCompContext *cc)
+{
+    return jit_codegen_lower(cc);
+}
+
+bool
+jit_pass_codegen(JitCompContext *cc)
+{
+    if (!jit_annl_enable_jitted_addr(cc))
+        return false;
+
+    return jit_codegen_gen_native(cc);
+}

+ 84 - 0
core/iwasm/fast-jit/jit_codegen.h

@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_CODEGEN_H_
+#define _JIT_CODEGEN_H_
+
+#include "bh_platform.h"
+#include "jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize codegen module, such as instruction encoder.
+ *
+ * @return true if succeeded; false if failed.
+ */
+bool
+jit_codegen_init();
+
+/**
+ * Destroy codegen module, such as instruction encoder.
+ */
+void
+jit_codegen_destroy();
+
+/**
+ * Get hard register information of each kind.
+ *
+ * @return the JitHardRegInfo array of each kind
+ */
+const JitHardRegInfo *
+jit_codegen_get_hreg_info();
+
+/**
+ * Get hard register by name.
+ *
+ * @param name the name of the hard register
+ *
+ * @return the hard register of the name
+ */
+JitReg
+jit_codegen_get_hreg_by_name(const char *name);
+
+/**
+ * Generate native code for the given compilation context
+ *
+ * @param cc the compilation context that is ready to do codegen
+ *
+ * @return true if succeeds, false otherwise
+ */
+bool
+jit_codegen_gen_native(JitCompContext *cc);
+
+/**
+ * lower unsupported operations to supported ones for the target.
+ *
+ * @param cc the compilation context that is ready to do codegen
+ *
+ * @return true if succeeds, false otherwise
+ */
+bool
+jit_codegen_lower(JitCompContext *cc);
+
+/**
+ * Dump native code in the given range to assembly.
+ *
+ * @param begin_addr begin address of the native code
+ * @param end_addr end address of the native code
+ */
+void
+jit_codegen_dump_native(void *begin_addr, void *end_addr);
+
+int
+jit_codegen_interp_jitted_glue(void *self, JitInterpSwitchInfo *info, void *pc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _JIT_CODEGEN_H_ */

+ 176 - 0
core/iwasm/fast-jit/jit_compiler.c

@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_compiler.h"
+#include "jit_ir.h"
+#include "jit_codegen.h"
+#include "jit_codecache.h"
+#include "../interpreter/wasm.h"
+
+typedef struct JitCompilerPass {
+    /* Name of the pass.  */
+    const char *name;
+    /* The entry of the compiler pass.  */
+    bool (*run)(JitCompContext *cc);
+} JitCompilerPass;
+
+/* clang-format off */
+static JitCompilerPass compiler_passes[] = {
+    { NULL, NULL },
+#define REG_PASS(name) { #name, jit_pass_##name }
+    REG_PASS(dump),
+    REG_PASS(update_cfg),
+    REG_PASS(frontend),
+    REG_PASS(lower_cg),
+    REG_PASS(regalloc),
+    REG_PASS(codegen),
+    REG_PASS(register_jitted_code)
+#undef REG_PASS
+};
+
+/* Number of compiler passes.  */
+#define COMPILER_PASS_NUM (sizeof(compiler_passes) / sizeof(compiler_passes[0]))
+
+#if WASM_ENABLE_FAST_JIT_DUMP == 0
+static const uint8 compiler_passes_without_dump[] = {
+    3, 4, 5, 6, 7, 0
+};
+#else
+static const uint8 compiler_passes_with_dump[] = {
+    3, 2, 1, 4, 1, 5, 1, 6, 1, 7, 0
+};
+#endif
+
+/* The exported global data of JIT compiler.  */
+static JitGlobals jit_globals = {
+#if WASM_ENABLE_FAST_JIT_DUMP == 0
+    .passes = compiler_passes_without_dump,
+#else
+    .passes = compiler_passes_with_dump,
+#endif
+    .return_to_interp_from_jitted = NULL
+};
+/* clang-format on */
+
+static bool
+apply_compiler_passes(JitCompContext *cc)
+{
+    const uint8 *p = jit_globals.passes;
+
+    for (; *p; p++) {
+        /* Set the pass NO.  */
+        cc->cur_pass_no = p - jit_globals.passes;
+        bh_assert(*p < COMPILER_PASS_NUM);
+
+        if (!compiler_passes[*p].run(cc)) {
+            LOG_VERBOSE("JIT: compilation failed at pass[%td] = %s\n",
+                        p - jit_globals.passes, compiler_passes[*p].name);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool
+jit_compiler_init(const JitCompOptions *options)
+{
+    uint32 code_cache_size = options->code_cache_size > 0
+                                 ? options->code_cache_size
+                                 : FAST_JIT_DEFAULT_CODE_CACHE_SIZE;
+
+    LOG_VERBOSE("JIT: compiler init with code cache size: %u\n",
+                code_cache_size);
+
+    if (!jit_code_cache_init(code_cache_size))
+        return false;
+
+    if (!jit_codegen_init())
+        goto fail1;
+
+    return true;
+
+fail1:
+    jit_code_cache_destroy();
+    return false;
+}
+
+void
+jit_compiler_destroy()
+{
+    jit_codegen_destroy();
+
+    jit_code_cache_destroy();
+}
+
+JitGlobals *
+jit_compiler_get_jit_globals()
+{
+    return &jit_globals;
+}
+
+const char *
+jit_compiler_get_pass_name(unsigned i)
+{
+    return i < COMPILER_PASS_NUM ? compiler_passes[i].name : NULL;
+}
+
+bool
+jit_compiler_compile(WASMModule *module, uint32 func_idx)
+{
+    JitCompContext *cc;
+    char *last_error;
+    bool ret = true;
+
+    /* Initialize compilation context.  */
+    if (!(cc = jit_calloc(sizeof(*cc))))
+        return false;
+
+    if (!jit_cc_init(cc, 64)) {
+        jit_free(cc);
+        return false;
+    }
+
+    cc->cur_wasm_module = module;
+    cc->cur_wasm_func =
+        module->functions[func_idx - module->import_function_count];
+    cc->cur_wasm_func_idx = func_idx;
+    cc->mem_space_unchanged = (!cc->cur_wasm_func->has_op_memory_grow
+                               && !cc->cur_wasm_func->has_op_func_call)
+                              || (!module->possible_memory_grow);
+
+    /* Apply compiler passes.  */
+    if (!apply_compiler_passes(cc) || jit_get_last_error(cc)) {
+        last_error = jit_get_last_error(cc);
+        os_printf("fast jit compilation failed: %s\n",
+                  last_error ? last_error : "unknown error");
+        ret = false;
+    }
+
+    /* Delete the compilation context.  */
+    jit_cc_delete(cc);
+
+    return ret;
+}
+
+bool
+jit_compiler_compile_all(WASMModule *module)
+{
+    uint32 i;
+
+    for (i = 0; i < module->function_count; i++) {
+        if (!jit_compiler_compile(module, module->import_function_count + i)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+int
+jit_interp_switch_to_jitted(void *exec_env, JitInterpSwitchInfo *info, void *pc)
+{
+    return jit_codegen_interp_jitted_glue(exec_env, info, pc);
+}

+ 143 - 0
core/iwasm/fast-jit/jit_compiler.h

@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_COMPILER_H_
+#define _JIT_COMPILER_H_
+
+#include "bh_platform.h"
+#include "../interpreter/wasm_runtime.h"
+#include "jit_ir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct JitGlobals {
+    /* Compiler pass sequence, the last element must be 0 */
+    const uint8 *passes;
+    char *return_to_interp_from_jitted;
+} JitGlobals;
+
+/**
+ * Actions the interpreter should do when jitted code returns to
+ * interpreter.
+ */
+typedef enum JitInterpAction {
+    JIT_INTERP_ACTION_NORMAL, /* normal execution */
+    JIT_INTERP_ACTION_THROWN, /* exception was thrown */
+    JIT_INTERP_ACTION_CALL    /* call wasm function */
+} JitInterpAction;
+
+/**
+ * Information exchanged between jitted code and interpreter.
+ */
+typedef struct JitInterpSwitchInfo {
+    /* Points to the frame that is passed to jitted code and the frame
+       that is returned from jitted code */
+    void *frame;
+
+    /* Output values from jitted code of different actions */
+    union {
+        /* IP and SP offsets for NORMAL */
+        struct {
+            int32 ip;
+            int32 sp;
+        } normal;
+
+        /* Function called from jitted code for CALL */
+        struct {
+            void *function;
+        } call;
+
+        /* Returned integer and/or floating point values for RETURN. This
+           is also used to pass return values from interpreter to jitted
+           code if the caller is in jitted code and the callee is in
+           interpreter. */
+        struct {
+            uint32 ival[2];
+            uint32 fval[2];
+            uint32 last_return_type;
+        } ret;
+    } out;
+} JitInterpSwitchInfo;
+
+/* Jit compiler options */
+typedef struct JitCompOptions {
+    uint32 code_cache_size;
+    uint32 opt_level;
+} JitCompOptions;
+
+bool
+jit_compiler_init(const JitCompOptions *option);
+
+void
+jit_compiler_destroy();
+
+JitGlobals *
+jit_compiler_get_jit_globals();
+
+const char *
+jit_compiler_get_pass_name(unsigned i);
+
+bool
+jit_compiler_compile(WASMModule *module, uint32 func_idx);
+
+bool
+jit_compiler_compile_all(WASMModule *module);
+
+int
+jit_interp_switch_to_jitted(void *self, JitInterpSwitchInfo *info, void *pc);
+
+/*
+ * Pass declarations:
+ */
+
+/**
+ * Dump the compilation context.
+ */
+bool
+jit_pass_dump(JitCompContext *cc);
+
+/**
+ * Update CFG (usually before dump for better readability).
+ */
+bool
+jit_pass_update_cfg(JitCompContext *cc);
+
+/**
+ * Translate profiling result into MIR.
+ */
+bool
+jit_pass_frontend(JitCompContext *cc);
+
+/**
+ * Lower unsupported operations into supported ones.
+ */
+bool
+jit_pass_lower_cg(JitCompContext *cc);
+
+/**
+ * Register allocation.
+ */
+bool
+jit_pass_regalloc(JitCompContext *cc);
+
+/**
+ * Native code generation.
+ */
+bool
+jit_pass_codegen(JitCompContext *cc);
+
+/**
+ * Register the jitted code so that it can be executed.
+ */
+bool
+jit_pass_register_jitted_code(JitCompContext *cc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _JIT_COMPILER_H_ */

+ 331 - 0
core/iwasm/fast-jit/jit_dump.c

@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_dump.h"
+#include "jit_compiler.h"
+#include "jit_codegen.h"
+
+void
+jit_dump_reg(JitCompContext *cc, JitReg reg)
+{
+    unsigned kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+
+    switch (kind) {
+        case JIT_REG_KIND_VOID:
+            os_printf("VOID");
+            break;
+
+        case JIT_REG_KIND_I32:
+            if (jit_reg_is_const(reg)) {
+                unsigned rel = jit_cc_get_const_I32_rel(cc, reg);
+
+                os_printf("0x%x", jit_cc_get_const_I32(cc, reg));
+
+                if (rel)
+                    os_printf("(rel: 0x%x)", rel);
+            }
+            else
+                os_printf("i%d", no);
+            break;
+
+        case JIT_REG_KIND_I64:
+            if (jit_reg_is_const(reg))
+                os_printf("0x%llxL", jit_cc_get_const_I64(cc, reg));
+            else
+                os_printf("I%d", no);
+            break;
+
+        case JIT_REG_KIND_F32:
+            if (jit_reg_is_const(reg))
+                os_printf("%f", jit_cc_get_const_F32(cc, reg));
+            else
+                os_printf("f%d", no);
+            break;
+
+        case JIT_REG_KIND_F64:
+            if (jit_reg_is_const(reg))
+                os_printf("%fL", jit_cc_get_const_F64(cc, reg));
+            else
+                os_printf("D%d", no);
+            break;
+
+        case JIT_REG_KIND_L32:
+            os_printf("L%d", no);
+            break;
+
+        default:
+            bh_assert(!"Unsupported register kind.");
+    }
+}
+
+static void
+jit_dump_insn_Reg(JitCompContext *cc, JitInsn *insn, unsigned opnd_num)
+{
+    unsigned i;
+
+    for (i = 0; i < opnd_num; i++) {
+        os_printf(i == 0 ? " " : ", ");
+        jit_dump_reg(cc, *(jit_insn_opnd(insn, i)));
+    }
+
+    os_printf("\n");
+}
+
+static void
+jit_dump_insn_VReg(JitCompContext *cc, JitInsn *insn, unsigned opnd_num)
+{
+    unsigned i;
+
+    opnd_num = jit_insn_opndv_num(insn);
+
+    for (i = 0; i < opnd_num; i++) {
+        os_printf(i == 0 ? " " : ", ");
+        jit_dump_reg(cc, *(jit_insn_opndv(insn, i)));
+    }
+
+    os_printf("\n");
+}
+
+static void
+jit_dump_insn_LookupSwitch(JitCompContext *cc, JitInsn *insn, unsigned opnd_num)
+{
+    unsigned i;
+    JitOpndLookupSwitch *opnd = jit_insn_opndls(insn);
+
+    os_printf(" ");
+    jit_dump_reg(cc, opnd->value);
+    os_printf("\n%16s: ", "default");
+    jit_dump_reg(cc, opnd->default_target);
+    os_printf("\n");
+
+    for (i = 0; i < opnd->match_pairs_num; i++) {
+        os_printf("%18d: ", opnd->match_pairs[i].value);
+        jit_dump_reg(cc, opnd->match_pairs[i].target);
+        os_printf("\n");
+    }
+}
+
+void
+jit_dump_insn(JitCompContext *cc, JitInsn *insn)
+{
+    switch (insn->opcode) {
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE)     \
+    case JIT_OP_##NAME:                                \
+        os_printf("    %-15s", #NAME);                 \
+        jit_dump_insn_##OPND_KIND(cc, insn, OPND_NUM); \
+        break;
+#include "jit_ir.def"
+#undef INSN
+    }
+}
+
+void
+jit_dump_basic_block(JitCompContext *cc, JitBasicBlock *block)
+{
+    unsigned i, label_index;
+    void *begin_addr, *end_addr;
+    JitBasicBlock *block_next;
+    JitInsn *insn;
+    JitRegVec preds = jit_basic_block_preds(block);
+    JitRegVec succs = jit_basic_block_succs(block);
+    JitReg label = jit_basic_block_label(block), label_next;
+    JitReg *reg;
+
+    jit_dump_reg(cc, label);
+    os_printf(":\n    ; PREDS(");
+
+    JIT_REG_VEC_FOREACH(preds, i, reg)
+    {
+        if (i > 0)
+            os_printf(" ");
+        jit_dump_reg(cc, *reg);
+    }
+
+    os_printf(")\n    ;");
+
+    if (jit_annl_is_enabled_begin_bcip(cc))
+        os_printf(" BEGIN_BCIP=0x%04tx",
+                  *(jit_annl_begin_bcip(cc, label))
+                      - (uint8 *)cc->cur_wasm_module->load_addr);
+
+    if (jit_annl_is_enabled_end_bcip(cc))
+        os_printf(" END_BCIP=0x%04tx",
+                  *(jit_annl_end_bcip(cc, label))
+                      - (uint8 *)cc->cur_wasm_module->load_addr);
+    os_printf("\n");
+
+    if (jit_annl_is_enabled_jitted_addr(cc)) {
+        begin_addr = *(jit_annl_jitted_addr(cc, label));
+
+        if (label == cc->entry_label) {
+            block_next = cc->_ann._label_basic_block[2];
+            label_next = jit_basic_block_label(block_next);
+            end_addr = *(jit_annl_jitted_addr(cc, label_next));
+        }
+        else if (label == cc->exit_label) {
+            end_addr = cc->jitted_addr_end;
+        }
+        else {
+            label_index = jit_reg_no(label);
+            if (label_index < jit_cc_label_num(cc) - 1)
+                block_next = cc->_ann._label_basic_block[label_index + 1];
+            else
+                block_next = cc->_ann._label_basic_block[1];
+            label_next = jit_basic_block_label(block_next);
+            end_addr = *(jit_annl_jitted_addr(cc, label_next));
+        }
+
+        jit_codegen_dump_native(begin_addr, end_addr);
+    }
+    else {
+        /* Dump IR.  */
+        JIT_FOREACH_INSN(block, insn) jit_dump_insn(cc, insn);
+    }
+
+    os_printf("    ; SUCCS(");
+
+    JIT_REG_VEC_FOREACH(succs, i, reg)
+    {
+        if (i > 0)
+            os_printf(" ");
+        jit_dump_reg(cc, *reg);
+    }
+
+    os_printf(")\n\n");
+}
+
+static void
+dump_func_name(JitCompContext *cc)
+{
+    const char *func_name = NULL;
+    WASMModule *module = cc->cur_wasm_module;
+
+#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0
+    func_name = cc->cur_wasm_func->field_name;
+#endif
+
+    /* if custom name section is not generated,
+       search symbols from export table */
+    if (!func_name) {
+        uint32 i;
+        for (i = 0; i < module->export_count; i++) {
+            if (module->exports[i].kind == EXPORT_KIND_FUNC
+                && module->exports[i].index == cc->cur_wasm_func_idx) {
+                func_name = module->exports[i].name;
+                break;
+            }
+        }
+    }
+
+    /* function name not exported, print number instead */
+    if (func_name == NULL) {
+        os_printf("$f%d", cc->cur_wasm_func_idx);
+    }
+    else {
+        os_printf("%s", func_name);
+    }
+}
+
+static void
+dump_cc_ir(JitCompContext *cc)
+{
+    unsigned i, end;
+    JitBasicBlock *block;
+    JitReg label;
+    const char *kind_names[] = { "VOID", "I32", "I64",  "F32",
+                                 "F64",  "V64", "V128", "V256" };
+
+    os_printf("; Function: ");
+    dump_func_name(cc);
+    os_printf("\n");
+
+    os_printf("; Constant table sizes:");
+
+    for (i = 0; i < JIT_REG_KIND_L32; i++)
+        os_printf(" %s=%d", kind_names[i], cc->_const_val._num[i]);
+
+    os_printf("\n; Label number: %d", jit_cc_label_num(cc));
+    os_printf("\n; Instruction number: %d", jit_cc_insn_num(cc));
+    os_printf("\n; Register numbers:");
+
+    for (i = 0; i < JIT_REG_KIND_L32; i++)
+        os_printf(" %s=%d", kind_names[i], jit_cc_reg_num(cc, i));
+
+    os_printf("\n; Label annotations:");
+#define ANN_LABEL(TYPE, NAME)           \
+    if (jit_annl_is_enabled_##NAME(cc)) \
+        os_printf(" %s", #NAME);
+#include "jit_ir.def"
+#undef ANN_LABEL
+
+    os_printf("\n; Instruction annotations:");
+#define ANN_INSN(TYPE, NAME)            \
+    if (jit_anni_is_enabled_##NAME(cc)) \
+        os_printf(" %s", #NAME);
+#include "jit_ir.def"
+#undef ANN_INSN
+
+    os_printf("\n; Register annotations:");
+#define ANN_REG(TYPE, NAME)             \
+    if (jit_annr_is_enabled_##NAME(cc)) \
+        os_printf(" %s", #NAME);
+#include "jit_ir.def"
+#undef ANN_REG
+
+    os_printf("\n\n");
+
+    if (jit_annl_is_enabled_next_label(cc)) {
+        /* Blocks have been reordered, use that order to dump.  */
+        for (label = cc->entry_label; label;
+             label = *(jit_annl_next_label(cc, label)))
+            jit_dump_basic_block(cc, *(jit_annl_basic_block(cc, label)));
+    }
+    else {
+        /* Otherwise, use the default order.  */
+        jit_dump_basic_block(cc, jit_cc_entry_basic_block(cc));
+
+        JIT_FOREACH_BLOCK(cc, i, end, block) jit_dump_basic_block(cc, block);
+
+        jit_dump_basic_block(cc, jit_cc_exit_basic_block(cc));
+    }
+}
+
+void
+jit_dump_cc(JitCompContext *cc)
+{
+    if (jit_cc_label_num(cc) <= 2)
+        return;
+
+    dump_cc_ir(cc);
+}
+
+bool
+jit_pass_dump(JitCompContext *cc)
+{
+    const JitGlobals *jit_globals = jit_compiler_get_jit_globals();
+    const uint8 *passes = jit_globals->passes;
+    uint8 pass_no = cc->cur_pass_no;
+    const char *pass_name =
+        pass_no > 0 ? jit_compiler_get_pass_name(passes[pass_no - 1]) : "NULL";
+
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+    if (!strcmp(pass_name, "lower_cg"))
+        /* Ignore lower codegen pass as it does nothing in x86-64 */
+        return true;
+#endif
+
+    os_printf("JIT.COMPILER.DUMP: PASS_NO=%d PREV_PASS=%s\n\n", pass_no,
+              pass_name);
+    jit_dump_cc(cc);
+    os_printf("\n");
+    return true;
+}
+
+bool
+jit_pass_update_cfg(JitCompContext *cc)
+{
+    return jit_cc_update_cfg(cc);
+}

+ 54 - 0
core/iwasm/fast-jit/jit_dump.h

@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_DUMP_H_
+#define _JIT_DUMP_H_
+
+#include "jit_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Dump a register.
+ *
+ * @param cc compilation context of the register
+ * @param reg register to be dumped
+ */
+void
+jit_dump_reg(JitCompContext *cc, JitReg reg);
+
+/**
+ * Dump an instruction.
+ *
+ * @param cc compilation context of the instruction
+ * @param insn instruction to be dumped
+ */
+void
+jit_dump_insn(JitCompContext *cc, JitInsn *insn);
+
+/**
+ * Dump a block.
+ *
+ * @param cc compilation context of the block
+ * @param block block to be dumped
+ */
+void
+jit_dump_block(JitCompContext *cc, JitBlock *block);
+
+/**
+ * Dump a compilation context.
+ *
+ * @param cc compilation context to be dumped
+ */
+void
+jit_dump_cc(JitCompContext *cc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _JIT_DUMP_H_ */

+ 2248 - 0
core/iwasm/fast-jit/jit_frontend.c

@@ -0,0 +1,2248 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_compiler.h"
+#include "jit_frontend.h"
+#include "fe/jit_emit_compare.h"
+#include "fe/jit_emit_const.h"
+#include "fe/jit_emit_control.h"
+#include "fe/jit_emit_conversion.h"
+#include "fe/jit_emit_exception.h"
+#include "fe/jit_emit_function.h"
+#include "fe/jit_emit_memory.h"
+#include "fe/jit_emit_numberic.h"
+#include "fe/jit_emit_parametric.h"
+#include "fe/jit_emit_table.h"
+#include "fe/jit_emit_variable.h"
+#include "../interpreter/wasm_interp.h"
+#include "../interpreter/wasm_opcode.h"
+#include "../common/wasm_exec_env.h"
+
+/* clang-format off */
+static const char *jit_exception_msgs[] = {
+    "unreachable",                    /* JIT_EXCE_UNREACHABLE */
+    "allocate memory failed",         /* JIT_EXCE_OUT_OF_MEMORY */
+    "out of bounds memory access",    /* JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS */
+    "integer overflow",               /* JIT_EXCE_INTEGER_OVERFLOW */
+    "integer divide by zero",         /* JIT_EXCE_INTEGER_DIVIDE_BY_ZERO */
+    "invalid conversion to integer",  /* JIT_EXCE_INVALID_CONVERSION_TO_INTEGER */
+    "indirect call type mismatch",    /* JIT_EXCE_INVALID_FUNCTION_TYPE_INDEX */
+    "invalid function index",         /* JIT_EXCE_INVALID_FUNCTION_INDEX */
+    "undefined element",              /* JIT_EXCE_UNDEFINED_ELEMENT */
+    "uninitialized element",          /* JIT_EXCE_UNINITIALIZED_ELEMENT */
+    "failed to call unlinked import function", /* JIT_EXCE_CALL_UNLINKED_IMPORT_FUNC */
+    "native stack overflow",          /* JIT_EXCE_NATIVE_STACK_OVERFLOW */
+    "unaligned atomic",               /* JIT_EXCE_UNALIGNED_ATOMIC */
+    "wasm auxiliary stack overflow",  /* JIT_EXCE_AUX_STACK_OVERFLOW */
+    "wasm auxiliary stack underflow", /* JIT_EXCE_AUX_STACK_UNDERFLOW */
+    "out of bounds table access",     /* JIT_EXCE_OUT_OF_BOUNDS_TABLE_ACCESS */
+    "wasm operand stack overflow",    /* JIT_EXCE_OPERAND_STACK_OVERFLOW */
+    "",                               /* JIT_EXCE_ALREADY_THROWN */
+};
+/* clang-format on */
+
+JitReg
+get_module_inst_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+
+    if (!frame->module_inst_reg) {
+        frame->module_inst_reg = cc->module_inst_reg;
+        GEN_INSN(LDPTR, frame->module_inst_reg, cc->exec_env_reg,
+                 NEW_CONST(I32, offsetof(WASMExecEnv, module_inst)));
+    }
+    return frame->module_inst_reg;
+}
+
+JitReg
+get_module_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg module_inst_reg = get_module_inst_reg(frame);
+
+    if (!frame->module_reg) {
+        frame->module_reg = cc->module_reg;
+        GEN_INSN(LDPTR, frame->module_reg, module_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMModuleInstance, module)));
+    }
+    return frame->module_reg;
+}
+
+JitReg
+get_fast_jit_func_ptrs_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg module_inst_reg = get_module_inst_reg(frame);
+
+    if (!frame->fast_jit_func_ptrs_reg) {
+        frame->fast_jit_func_ptrs_reg = cc->fast_jit_func_ptrs_reg;
+        GEN_INSN(
+            LDPTR, frame->fast_jit_func_ptrs_reg, module_inst_reg,
+            NEW_CONST(I32, offsetof(WASMModuleInstance, fast_jit_func_ptrs)));
+    }
+    return frame->fast_jit_func_ptrs_reg;
+}
+
+JitReg
+get_global_data_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg module_inst_reg = get_module_inst_reg(frame);
+
+    if (!frame->global_data_reg) {
+        frame->global_data_reg = cc->global_data_reg;
+        GEN_INSN(LDPTR, frame->global_data_reg, module_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMModuleInstance, global_data)));
+    }
+    return frame->global_data_reg;
+}
+
+JitReg
+get_aux_stack_bound_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+
+    if (!frame->aux_stack_bound_reg) {
+        frame->aux_stack_bound_reg = cc->aux_stack_bound_reg;
+        GEN_INSN(
+            LDI32, frame->aux_stack_bound_reg, cc->exec_env_reg,
+            NEW_CONST(I32, offsetof(WASMExecEnv, aux_stack_boundary.boundary)));
+    }
+    return frame->aux_stack_bound_reg;
+}
+
+JitReg
+get_aux_stack_bottom_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+
+    if (!frame->aux_stack_bottom_reg) {
+        frame->aux_stack_bottom_reg = cc->aux_stack_bottom_reg;
+        GEN_INSN(
+            LDI32, frame->aux_stack_bottom_reg, cc->exec_env_reg,
+            NEW_CONST(I32, offsetof(WASMExecEnv, aux_stack_bottom.bottom)));
+    }
+    return frame->aux_stack_bottom_reg;
+}
+
+JitReg
+get_memories_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg module_inst_reg = get_module_inst_reg(frame);
+
+    if (!frame->memories_reg) {
+        frame->memories_reg = cc->memories_reg;
+        GEN_INSN(LDPTR, frame->memories_reg, module_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMModuleInstance, memories)));
+    }
+    return frame->memories_reg;
+}
+
+JitReg
+get_memory_inst_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memories_reg = get_memories_reg(frame);
+
+    if (!frame->memory_regs[mem_idx].memory_inst) {
+        frame->memory_regs[mem_idx].memory_inst =
+            cc->memory_regs[mem_idx].memory_inst;
+        GEN_INSN(
+            LDPTR, frame->memory_regs[mem_idx].memory_inst, memories_reg,
+            NEW_CONST(I32, (uint32)sizeof(WASMMemoryInstance *) * mem_idx));
+    }
+    return frame->memory_regs[mem_idx].memory_inst;
+}
+
+JitReg
+get_memory_data_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].memory_data) {
+        frame->memory_regs[mem_idx].memory_data =
+            cc->memory_regs[mem_idx].memory_data;
+        GEN_INSN(LDPTR, frame->memory_regs[mem_idx].memory_data,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance, memory_data)));
+    }
+    return frame->memory_regs[mem_idx].memory_data;
+}
+
+JitReg
+get_memory_data_end_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].memory_data_end) {
+        frame->memory_regs[mem_idx].memory_data_end =
+            cc->memory_regs[mem_idx].memory_data_end;
+        GEN_INSN(LDPTR, frame->memory_regs[mem_idx].memory_data_end,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance, memory_data_end)));
+    }
+    return frame->memory_regs[mem_idx].memory_data_end;
+}
+
+JitReg
+get_mem_bound_check_1byte_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].mem_bound_check_1byte) {
+        frame->memory_regs[mem_idx].mem_bound_check_1byte =
+            cc->memory_regs[mem_idx].mem_bound_check_1byte;
+#if UINTPTR_MAX == UINT64_MAX
+        GEN_INSN(LDI64, frame->memory_regs[mem_idx].mem_bound_check_1byte,
+                 memory_inst_reg,
+                 NEW_CONST(
+                     I32, offsetof(WASMMemoryInstance, mem_bound_check_1byte)));
+#else
+        GEN_INSN(LDI32, frame->memory_regs[mem_idx].mem_bound_check_1byte,
+                 memory_inst_reg,
+                 NEW_CONST(
+                     I32, offsetof(WASMMemoryInstance, mem_bound_check_1byte)));
+#endif
+    }
+    return frame->memory_regs[mem_idx].mem_bound_check_1byte;
+}
+
+JitReg
+get_mem_bound_check_2bytes_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].mem_bound_check_2bytes) {
+        frame->memory_regs[mem_idx].mem_bound_check_2bytes =
+            cc->memory_regs[mem_idx].mem_bound_check_2bytes;
+#if UINTPTR_MAX == UINT64_MAX
+        GEN_INSN(LDI64, frame->memory_regs[mem_idx].mem_bound_check_2bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_2bytes)));
+#else
+        GEN_INSN(LDI32, frame->memory_regs[mem_idx].mem_bound_check_2bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_2bytes)));
+#endif
+    }
+    return frame->memory_regs[mem_idx].mem_bound_check_2bytes;
+}
+
+JitReg
+get_mem_bound_check_4bytes_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].mem_bound_check_4bytes) {
+        frame->memory_regs[mem_idx].mem_bound_check_4bytes =
+            cc->memory_regs[mem_idx].mem_bound_check_4bytes;
+#if UINTPTR_MAX == UINT64_MAX
+        GEN_INSN(LDI64, frame->memory_regs[mem_idx].mem_bound_check_4bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_4bytes)));
+#else
+        GEN_INSN(LDI32, frame->memory_regs[mem_idx].mem_bound_check_4bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_4bytes)));
+#endif
+    }
+    return frame->memory_regs[mem_idx].mem_bound_check_4bytes;
+}
+
+JitReg
+get_mem_bound_check_8bytes_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].mem_bound_check_8bytes) {
+        frame->memory_regs[mem_idx].mem_bound_check_8bytes =
+            cc->memory_regs[mem_idx].mem_bound_check_8bytes;
+#if UINTPTR_MAX == UINT64_MAX
+        GEN_INSN(LDI64, frame->memory_regs[mem_idx].mem_bound_check_8bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_8bytes)));
+#else
+        GEN_INSN(LDI32, frame->memory_regs[mem_idx].mem_bound_check_8bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_8bytes)));
+#endif
+    }
+    return frame->memory_regs[mem_idx].mem_bound_check_8bytes;
+}
+
+JitReg
+get_mem_bound_check_16bytes_reg(JitFrame *frame, uint32 mem_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg memory_inst_reg = get_memory_inst_reg(frame, mem_idx);
+
+    if (!frame->memory_regs[mem_idx].mem_bound_check_16bytes) {
+        frame->memory_regs[mem_idx].mem_bound_check_16bytes =
+            cc->memory_regs[mem_idx].mem_bound_check_16bytes;
+#if UINTPTR_MAX == UINT64_MAX
+        GEN_INSN(LDI64, frame->memory_regs[mem_idx].mem_bound_check_16bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_16bytes)));
+#else
+        GEN_INSN(LDI32, frame->memory_regs[mem_idx].mem_bound_check_16bytes,
+                 memory_inst_reg,
+                 NEW_CONST(I32, offsetof(WASMMemoryInstance,
+                                         mem_bound_check_16bytes)));
+#endif
+    }
+    return frame->memory_regs[mem_idx].mem_bound_check_16bytes;
+}
+
+JitReg
+get_tables_reg(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg inst_reg = get_module_inst_reg(frame);
+
+    if (!frame->tables_reg) {
+        frame->tables_reg = cc->tables_reg;
+        GEN_INSN(LDPTR, frame->tables_reg, inst_reg,
+                 NEW_CONST(I32, offsetof(WASMModuleInstance, tables)));
+    }
+    return frame->tables_reg;
+}
+
+JitReg
+get_table_inst_reg(JitFrame *frame, uint32 tbl_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg tables_reg = get_tables_reg(frame);
+
+    if (!frame->table_regs[tbl_idx].table_inst) {
+        frame->table_regs[tbl_idx].table_inst =
+            cc->table_regs[tbl_idx].table_inst;
+        GEN_INSN(LDPTR, frame->table_regs[tbl_idx].table_inst, tables_reg,
+                 NEW_CONST(I32, sizeof(WASMTableInstance *) * tbl_idx));
+    }
+    return frame->table_regs[tbl_idx].table_inst;
+}
+
+JitReg
+get_table_data_reg(JitFrame *frame, uint32 tbl_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg table_reg = get_table_inst_reg(frame, tbl_idx);
+
+    if (!frame->table_regs[tbl_idx].table_data) {
+        frame->table_regs[tbl_idx].table_data =
+            cc->table_regs[tbl_idx].table_data;
+        GEN_INSN(ADD, frame->table_regs[tbl_idx].table_data, table_reg,
+                 NEW_CONST(I64, offsetof(WASMTableInstance, base_addr)));
+    }
+    return frame->table_regs[tbl_idx].table_data;
+}
+
+JitReg
+get_table_cur_size_reg(JitFrame *frame, uint32 tbl_idx)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg table_reg = get_table_inst_reg(frame, tbl_idx);
+
+    if (!frame->table_regs[tbl_idx].table_cur_size) {
+        frame->table_regs[tbl_idx].table_cur_size =
+            cc->table_regs[tbl_idx].table_cur_size;
+        GEN_INSN(LDI32, frame->table_regs[tbl_idx].table_cur_size, table_reg,
+                 NEW_CONST(I32, offsetof(WASMTableInstance, cur_size)));
+    }
+    return frame->table_regs[tbl_idx].table_cur_size;
+}
+
+void
+clear_fixed_virtual_regs(JitFrame *frame)
+{
+    WASMModule *module = frame->cc->cur_wasm_module;
+    uint32 count, i;
+
+    frame->module_inst_reg = 0;
+    frame->module_reg = 0;
+    frame->fast_jit_func_ptrs_reg = 0;
+    frame->global_data_reg = 0;
+    frame->aux_stack_bound_reg = 0;
+    frame->aux_stack_bottom_reg = 0;
+    frame->memories_reg = 0;
+    frame->tables_reg = 0;
+
+    count = module->import_memory_count + module->memory_count;
+    for (i = 0; i < count; i++) {
+        frame->memory_regs[i].memory_inst = 0;
+        frame->memory_regs[i].memory_data = 0;
+        frame->memory_regs[i].memory_data_end = 0;
+        frame->memory_regs[i].mem_bound_check_1byte = 0;
+        frame->memory_regs[i].mem_bound_check_2bytes = 0;
+        frame->memory_regs[i].mem_bound_check_4bytes = 0;
+        frame->memory_regs[i].mem_bound_check_8bytes = 0;
+        frame->memory_regs[i].mem_bound_check_16bytes = 0;
+    }
+
+    count = module->import_table_count + module->table_count;
+    for (i = 0; i < count; i++) {
+        frame->table_regs[i].table_inst = 0;
+        frame->table_regs[i].table_data = 0;
+        frame->table_regs[i].table_cur_size = 0;
+    }
+}
+
+void
+clear_memory_regs(JitFrame *frame)
+{
+    WASMModule *module = frame->cc->cur_wasm_module;
+    uint32 count, i;
+
+    count = module->import_memory_count + module->memory_count;
+    for (i = 0; i < count; i++) {
+        frame->memory_regs[i].memory_data = 0;
+        frame->memory_regs[i].memory_data_end = 0;
+        frame->memory_regs[i].mem_bound_check_1byte = 0;
+        frame->memory_regs[i].mem_bound_check_2bytes = 0;
+        frame->memory_regs[i].mem_bound_check_4bytes = 0;
+        frame->memory_regs[i].mem_bound_check_8bytes = 0;
+        frame->memory_regs[i].mem_bound_check_16bytes = 0;
+    }
+}
+
+void
+clear_table_regs(JitFrame *frame)
+{
+    WASMModule *module = frame->cc->cur_wasm_module;
+    uint32 count, i;
+
+    count = module->import_table_count + module->table_count;
+    for (i = 0; i < count; i++) {
+        frame->table_regs[i].table_cur_size = 0;
+    }
+}
+
+JitReg
+gen_load_i32(JitFrame *frame, unsigned n)
+{
+    if (!frame->lp[n].reg) {
+        JitCompContext *cc = frame->cc;
+        frame->lp[n].reg = jit_cc_new_reg_I32(cc);
+        GEN_INSN(LDI32, frame->lp[n].reg, cc->fp_reg,
+                 NEW_CONST(I32, offset_of_local(n)));
+    }
+
+    return frame->lp[n].reg;
+}
+
+JitReg
+gen_load_i64(JitFrame *frame, unsigned n)
+{
+    if (!frame->lp[n].reg) {
+        JitCompContext *cc = frame->cc;
+        frame->lp[n].reg = frame->lp[n + 1].reg = jit_cc_new_reg_I64(cc);
+        GEN_INSN(LDI64, frame->lp[n].reg, cc->fp_reg,
+                 NEW_CONST(I32, offset_of_local(n)));
+    }
+
+    return frame->lp[n].reg;
+}
+
+JitReg
+gen_load_f32(JitFrame *frame, unsigned n)
+{
+    if (!frame->lp[n].reg) {
+        JitCompContext *cc = frame->cc;
+        frame->lp[n].reg = jit_cc_new_reg_F32(cc);
+        GEN_INSN(LDF32, frame->lp[n].reg, cc->fp_reg,
+                 NEW_CONST(I32, offset_of_local(n)));
+    }
+
+    return frame->lp[n].reg;
+}
+
+JitReg
+gen_load_f64(JitFrame *frame, unsigned n)
+{
+    if (!frame->lp[n].reg) {
+        JitCompContext *cc = frame->cc;
+        frame->lp[n].reg = frame->lp[n + 1].reg = jit_cc_new_reg_F64(cc);
+        GEN_INSN(LDF64, frame->lp[n].reg, cc->fp_reg,
+                 NEW_CONST(I32, offset_of_local(n)));
+    }
+
+    return frame->lp[n].reg;
+}
+
+void
+gen_commit_values(JitFrame *frame, JitValueSlot *begin, JitValueSlot *end)
+{
+    JitCompContext *cc = frame->cc;
+    JitValueSlot *p;
+    int n;
+
+    for (p = begin; p < end; p++) {
+        if (!p->dirty)
+            continue;
+
+        p->dirty = 0;
+        n = p - frame->lp;
+
+        switch (jit_reg_kind(p->reg)) {
+            case JIT_REG_KIND_I32:
+                GEN_INSN(STI32, p->reg, cc->fp_reg,
+                         NEW_CONST(I32, offset_of_local(n)));
+                break;
+
+            case JIT_REG_KIND_I64:
+                GEN_INSN(STI64, p->reg, cc->fp_reg,
+                         NEW_CONST(I32, offset_of_local(n)));
+                (++p)->dirty = 0;
+                break;
+
+            case JIT_REG_KIND_F32:
+                GEN_INSN(STF32, p->reg, cc->fp_reg,
+                         NEW_CONST(I32, offset_of_local(n)));
+                break;
+
+            case JIT_REG_KIND_F64:
+                GEN_INSN(STF64, p->reg, cc->fp_reg,
+                         NEW_CONST(I32, offset_of_local(n)));
+                (++p)->dirty = 0;
+                break;
+        }
+    }
+}
+
+/**
+ * Generate instructions to commit SP and IP pointers to the frame.
+ *
+ * @param frame the frame information
+ */
+void
+gen_commit_sp_ip(JitFrame *frame)
+{
+    JitCompContext *cc = frame->cc;
+    JitReg sp;
+
+    if (frame->sp != frame->committed_sp) {
+        sp = jit_cc_new_reg_ptr(cc);
+        GEN_INSN(ADD, sp, cc->fp_reg,
+                 NEW_CONST(PTR, offset_of_local(frame->sp - frame->lp)));
+        GEN_INSN(STPTR, sp, cc->fp_reg,
+                 NEW_CONST(I32, offsetof(WASMInterpFrame, sp)));
+        frame->committed_sp = frame->sp;
+    }
+
+#if 0 /* Disable committing ip currently */
+    if (frame->ip != frame->committed_ip) {
+        GEN_INSN(STPTR, NEW_CONST(PTR, (uintptr_t)frame->ip), cc->fp_reg,
+                 NEW_CONST(I32, offsetof(WASMInterpFrame, ip)));
+        frame->committed_ip = frame->ip;
+    }
+#endif
+}
+
+static void
+jit_set_exception_with_id(WASMModuleInstance *module_inst, uint32 id)
+{
+    if (id < JIT_EXCE_NUM)
+        wasm_set_exception(module_inst, jit_exception_msgs[id]);
+    else
+        wasm_set_exception(module_inst, "unknown exception");
+}
+
+static bool
+create_fixed_virtual_regs(JitCompContext *cc)
+{
+    WASMModule *module = cc->cur_wasm_module;
+    uint64 total_size;
+    uint32 i, count;
+
+    cc->module_inst_reg = jit_cc_new_reg_ptr(cc);
+    cc->module_reg = jit_cc_new_reg_ptr(cc);
+    cc->fast_jit_func_ptrs_reg = jit_cc_new_reg_ptr(cc);
+    cc->global_data_reg = jit_cc_new_reg_ptr(cc);
+    cc->aux_stack_bound_reg = jit_cc_new_reg_I32(cc);
+    cc->aux_stack_bottom_reg = jit_cc_new_reg_I32(cc);
+    cc->memories_reg = jit_cc_new_reg_ptr(cc);
+    cc->tables_reg = jit_cc_new_reg_ptr(cc);
+
+    count = module->import_memory_count + module->memory_count;
+    if (count > 0) {
+        total_size = (uint64)sizeof(JitMemRegs) * count;
+        if (total_size > UINT32_MAX
+            || !(cc->memory_regs = jit_calloc((uint32)total_size))) {
+            jit_set_last_error(cc, "allocate memory failed");
+            return false;
+        }
+
+        for (i = 0; i < count; i++) {
+            cc->memory_regs[i].memory_inst = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].memory_data = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].memory_data_end = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].mem_bound_check_1byte = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].mem_bound_check_2bytes = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].mem_bound_check_4bytes = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].mem_bound_check_8bytes = jit_cc_new_reg_ptr(cc);
+            cc->memory_regs[i].mem_bound_check_16bytes = jit_cc_new_reg_ptr(cc);
+        }
+    }
+
+    count = module->import_table_count + module->table_count;
+    if (count > 0) {
+        total_size = (uint64)sizeof(JitTableRegs) * count;
+        if (total_size > UINT32_MAX
+            || !(cc->table_regs = jit_calloc((uint32)total_size))) {
+            jit_set_last_error(cc, "allocate memory failed");
+            return false;
+        }
+
+        for (i = 0; i < count; i++) {
+            cc->table_regs[i].table_inst = jit_cc_new_reg_ptr(cc);
+            cc->table_regs[i].table_data = jit_cc_new_reg_ptr(cc);
+            cc->table_regs[i].table_cur_size = jit_cc_new_reg_I32(cc);
+        }
+    }
+
+    return true;
+}
+
+static bool
+form_and_translate_func(JitCompContext *cc)
+{
+    JitBasicBlock *func_entry_basic_block;
+    JitReg func_entry_label;
+    JitInsn *insn;
+    JitIncomingInsn *incoming_insn, *incoming_insn_next;
+    uint32 i;
+
+    if (!create_fixed_virtual_regs(cc))
+        return false;
+
+    if (!(func_entry_basic_block = jit_frontend_translate_func(cc)))
+        return false;
+
+    jit_cc_reset_insn_hash(cc);
+
+    /* The label of the func entry basic block. */
+    func_entry_label = jit_basic_block_label(func_entry_basic_block);
+
+    /* Create a JMP instruction jumping to the func entry. */
+    if (!(insn = jit_cc_new_insn(cc, JMP, func_entry_label)))
+        return false;
+
+    /* Insert the instruction into the cc entry block. */
+    jit_basic_block_append_insn(jit_cc_entry_basic_block(cc), insn);
+
+    /* Patch INSNs jumping to exception basic blocks. */
+    for (i = 0; i < JIT_EXCE_NUM; i++) {
+        incoming_insn = cc->incoming_insns_for_exec_bbs[i];
+        if (incoming_insn) {
+            if (!(cc->exce_basic_blocks[i] = jit_cc_new_basic_block(cc, 0))) {
+                jit_set_last_error(cc, "create basic block failed");
+                return false;
+            }
+            while (incoming_insn) {
+                incoming_insn_next = incoming_insn->next;
+                insn = incoming_insn->insn;
+                if (insn->opcode == JIT_OP_JMP) {
+                    *(jit_insn_opnd(insn, 0)) =
+                        jit_basic_block_label(cc->exce_basic_blocks[i]);
+                }
+                else if (insn->opcode >= JIT_OP_BEQ
+                         && insn->opcode <= JIT_OP_BLEU) {
+                    *(jit_insn_opnd(insn, 1)) =
+                        jit_basic_block_label(cc->exce_basic_blocks[i]);
+                }
+                incoming_insn = incoming_insn_next;
+            }
+            cc->cur_basic_block = cc->exce_basic_blocks[i];
+            if (i != JIT_EXCE_ALREADY_THROWN) {
+                JitReg module_inst_reg = jit_cc_new_reg_ptr(cc);
+                GEN_INSN(LDPTR, module_inst_reg, cc->exec_env_reg,
+                         NEW_CONST(I32, offsetof(WASMExecEnv, module_inst)));
+                insn = GEN_INSN(
+                    CALLNATIVE, 0,
+                    NEW_CONST(PTR, (uintptr_t)jit_set_exception_with_id), 2);
+                if (insn) {
+                    *(jit_insn_opndv(insn, 2)) = module_inst_reg;
+                    *(jit_insn_opndv(insn, 3)) = NEW_CONST(I32, i);
+                }
+            }
+            GEN_INSN(RETURN, NEW_CONST(I32, JIT_INTERP_ACTION_THROWN));
+
+            *(jit_annl_begin_bcip(cc,
+                                  jit_basic_block_label(cc->cur_basic_block))) =
+                *(jit_annl_end_bcip(
+                    cc, jit_basic_block_label(cc->cur_basic_block))) =
+                    cc->cur_wasm_module->load_addr;
+        }
+    }
+
+    *(jit_annl_begin_bcip(cc, cc->entry_label)) =
+        *(jit_annl_end_bcip(cc, cc->entry_label)) =
+            *(jit_annl_begin_bcip(cc, cc->exit_label)) =
+                *(jit_annl_end_bcip(cc, cc->exit_label)) =
+                    cc->cur_wasm_module->load_addr;
+
+    return true;
+}
+
+bool
+jit_pass_frontend(JitCompContext *cc)
+{
+    /* Enable necessary annotations required at the current stage. */
+    if (!jit_annl_enable_begin_bcip(cc) || !jit_annl_enable_end_bcip(cc)
+        || !jit_annl_enable_end_sp(cc) || !jit_annr_enable_def_insn(cc)
+        || !jit_cc_enable_insn_hash(cc, 127))
+        return false;
+
+    if (!(form_and_translate_func(cc)))
+        return false;
+
+    /* Release the annotations after local CSE and translation. */
+    jit_cc_disable_insn_hash(cc);
+    jit_annl_disable_end_sp(cc);
+
+    return true;
+}
+
+static JitFrame *
+init_func_translation(JitCompContext *cc)
+{
+    JitFrame *jit_frame;
+    JitReg top, top_boundary, new_top, frame_boundary, frame_sp;
+    WASMModule *cur_wasm_module = cc->cur_wasm_module;
+    WASMFunction *cur_wasm_func = cc->cur_wasm_func;
+    uint32 cur_wasm_func_idx = cc->cur_wasm_func_idx;
+    uint32 max_locals =
+        cur_wasm_func->param_cell_num + cur_wasm_func->local_cell_num;
+    uint32 max_stacks = cur_wasm_func->max_stack_cell_num;
+    uint64 total_cell_num =
+        (uint64)cur_wasm_func->param_cell_num
+        + (uint64)cur_wasm_func->local_cell_num
+        + (uint64)cur_wasm_func->max_stack_cell_num
+        + ((uint64)cur_wasm_func->max_block_num) * sizeof(WASMBranchBlock) / 4;
+    uint32 frame_size, outs_size, local_size, count;
+    uint32 i, local_off;
+    uint64 total_size;
+
+    if ((uint64)max_locals + (uint64)max_stacks >= UINT32_MAX
+        || total_cell_num >= UINT32_MAX
+        || !(jit_frame = jit_calloc(offsetof(JitFrame, lp)
+                                    + sizeof(*jit_frame->lp)
+                                          * (max_locals + max_stacks)))) {
+        os_printf("allocate jit frame failed\n");
+        return NULL;
+    }
+
+    count =
+        cur_wasm_module->import_memory_count + cur_wasm_module->memory_count;
+    if (count > 0) {
+        total_size = (uint64)sizeof(JitMemRegs) * count;
+        if (total_size > UINT32_MAX
+            || !(jit_frame->memory_regs = jit_calloc((uint32)total_size))) {
+            jit_set_last_error(cc, "allocate memory failed");
+            jit_free(jit_frame);
+            return NULL;
+        }
+    }
+
+    count = cur_wasm_module->import_table_count + cur_wasm_module->table_count;
+    if (count > 0) {
+        total_size = (uint64)sizeof(JitTableRegs) * count;
+        if (total_size > UINT32_MAX
+            || !(jit_frame->table_regs = jit_calloc((uint32)total_size))) {
+            jit_set_last_error(cc, "allocate memory failed");
+            if (jit_frame->memory_regs)
+                jit_free(jit_frame->memory_regs);
+            jit_free(jit_frame);
+            return NULL;
+        }
+    }
+
+    jit_frame->cur_wasm_module = cur_wasm_module;
+    jit_frame->cur_wasm_func = cur_wasm_func;
+    jit_frame->cur_wasm_func_idx = cur_wasm_func_idx;
+    jit_frame->cc = cc;
+    jit_frame->max_locals = max_locals;
+    jit_frame->max_stacks = max_stacks;
+    jit_frame->sp = jit_frame->lp + max_locals;
+    jit_frame->ip = cur_wasm_func->code;
+
+    cc->jit_frame = jit_frame;
+    cc->cur_basic_block = jit_cc_entry_basic_block(cc);
+    cc->spill_cache_offset = wasm_interp_interp_frame_size(total_cell_num);
+    /* Set spill cache size according to max local cell num, max stack cell
+       num and virtual fixed register num */
+    cc->spill_cache_size = (max_locals + max_stacks) * 4 + sizeof(void *) * 4;
+    cc->total_frame_size = cc->spill_cache_offset + cc->spill_cache_size;
+    cc->jitted_return_address_offset =
+        offsetof(WASMInterpFrame, jitted_return_addr);
+    cc->cur_basic_block = jit_cc_entry_basic_block(cc);
+
+    frame_size = outs_size = cc->total_frame_size;
+    local_size =
+        (cur_wasm_func->param_cell_num + cur_wasm_func->local_cell_num) * 4;
+
+    top = jit_cc_new_reg_ptr(cc);
+    top_boundary = jit_cc_new_reg_ptr(cc);
+    new_top = jit_cc_new_reg_ptr(cc);
+    frame_boundary = jit_cc_new_reg_ptr(cc);
+    frame_sp = jit_cc_new_reg_ptr(cc);
+
+    /* top = exec_env->wasm_stack.s.top */
+    GEN_INSN(LDPTR, top, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top)));
+    /* top_boundary = exec_env->wasm_stack.s.top_boundary */
+    GEN_INSN(LDPTR, top_boundary, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top_boundary)));
+    /* frame_boundary = top + frame_size + outs_size */
+    GEN_INSN(ADD, frame_boundary, top, NEW_CONST(PTR, frame_size + outs_size));
+    /* if frame_boundary > top_boundary, throw stack overflow exception */
+    GEN_INSN(CMP, cc->cmp_reg, frame_boundary, top_boundary);
+    if (!jit_emit_exception(cc, JIT_EXCE_OPERAND_STACK_OVERFLOW, JIT_OP_BGTU,
+                            cc->cmp_reg, NULL)) {
+        return NULL;
+    }
+
+    /* Add first and then sub to reduce one used register */
+    /* new_top = frame_boundary - outs_size = top + frame_size */
+    GEN_INSN(SUB, new_top, frame_boundary, NEW_CONST(PTR, outs_size));
+    /* exec_env->wasm_stack.s.top = new_top */
+    GEN_INSN(STPTR, new_top, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top)));
+    /* frame_sp = frame->lp + local_size */
+    GEN_INSN(ADD, frame_sp, top,
+             NEW_CONST(PTR, offsetof(WASMInterpFrame, lp) + local_size));
+    /* frame->sp = frame_sp */
+    GEN_INSN(STPTR, frame_sp, top,
+             NEW_CONST(I32, offsetof(WASMInterpFrame, sp)));
+    /* frame->prev_frame = fp_reg */
+    GEN_INSN(STPTR, cc->fp_reg, top,
+             NEW_CONST(I32, offsetof(WASMInterpFrame, prev_frame)));
+    /* TODO: do we need to set frame->function? */
+    /*
+    GEN_INSN(STPTR, func_inst, top,
+             NEW_CONST(I32, offsetof(WASMInterpFrame, function)));
+    */
+    /* exec_env->cur_frame = top */
+    GEN_INSN(STPTR, top, cc->exec_env_reg,
+             NEW_CONST(I32, offsetof(WASMExecEnv, cur_frame)));
+    /* fp_reg = top */
+    GEN_INSN(MOV, cc->fp_reg, top);
+
+    /* Initialize local variables, set them to 0 */
+    local_off = (uint32)offsetof(WASMInterpFrame, lp)
+                + cur_wasm_func->param_cell_num * 4;
+    for (i = 0; i < cur_wasm_func->local_cell_num / 2; i++, local_off += 8) {
+        GEN_INSN(STI64, NEW_CONST(I64, 0), cc->fp_reg,
+                 NEW_CONST(I32, local_off));
+    }
+    if (cur_wasm_func->local_cell_num & 1) {
+        GEN_INSN(STI32, NEW_CONST(I32, 0), cc->fp_reg,
+                 NEW_CONST(I32, local_off));
+    }
+
+    return jit_frame;
+}
+
+static void
+free_block_memory(JitBlock *block)
+{
+    if (block->param_types)
+        jit_free(block->param_types);
+    if (block->result_types)
+        jit_free(block->result_types);
+    jit_free(block);
+}
+
+static JitBasicBlock *
+create_func_block(JitCompContext *cc)
+{
+    JitBlock *jit_block;
+    WASMFunction *cur_func = cc->cur_wasm_func;
+    WASMType *func_type = cur_func->func_type;
+    uint32 param_count = func_type->param_count;
+    uint32 result_count = func_type->result_count;
+
+    if (!(jit_block = jit_calloc(sizeof(JitBlock)))) {
+        return NULL;
+    }
+
+    if (param_count && !(jit_block->param_types = jit_calloc(param_count))) {
+        goto fail;
+    }
+    if (result_count && !(jit_block->result_types = jit_calloc(result_count))) {
+        goto fail;
+    }
+
+    /* Set block data */
+    jit_block->label_type = LABEL_TYPE_FUNCTION;
+    jit_block->param_count = param_count;
+    if (param_count) {
+        bh_memcpy_s(jit_block->param_types, param_count, func_type->types,
+                    param_count);
+    }
+    jit_block->result_count = result_count;
+    if (result_count) {
+        bh_memcpy_s(jit_block->result_types, result_count,
+                    func_type->types + param_count, result_count);
+    }
+    jit_block->wasm_code_end = cur_func->code + cur_func->code_size;
+    jit_block->frame_sp_begin = cc->jit_frame->sp;
+
+    /* Add function entry block */
+    if (!(jit_block->basic_block_entry = jit_cc_new_basic_block(cc, 0))) {
+        goto fail;
+    }
+    *(jit_annl_begin_bcip(
+        cc, jit_basic_block_label(jit_block->basic_block_entry))) =
+        cur_func->code;
+    jit_block_stack_push(&cc->block_stack, jit_block);
+    cc->cur_basic_block = jit_block->basic_block_entry;
+
+    return jit_block->basic_block_entry;
+
+fail:
+    free_block_memory(jit_block);
+    return NULL;
+}
+
+#define CHECK_BUF(buf, buf_end, length)                                 \
+    do {                                                                \
+        if (buf + length > buf_end) {                                   \
+            jit_set_last_error(cc, "read leb failed: unexpected end."); \
+            return false;                                               \
+        }                                                               \
+    } while (0)
+
+static bool
+read_leb(JitCompContext *cc, const uint8 *buf, const uint8 *buf_end,
+         uint32 *p_offset, uint32 maxbits, bool sign, uint64 *p_result)
+{
+    uint64 result = 0;
+    uint32 shift = 0;
+    uint32 bcnt = 0;
+    uint64 byte;
+
+    while (true) {
+        CHECK_BUF(buf, buf_end, 1);
+        byte = buf[*p_offset];
+        *p_offset += 1;
+        result |= ((byte & 0x7f) << shift);
+        shift += 7;
+        if ((byte & 0x80) == 0) {
+            break;
+        }
+        bcnt += 1;
+    }
+    if (bcnt > (maxbits + 6) / 7) {
+        jit_set_last_error(cc, "read leb failed: "
+                               "integer representation too long");
+        return false;
+    }
+    if (sign && (shift < maxbits) && (byte & 0x40)) {
+        /* Sign extend */
+        result |= (~((uint64)0)) << shift;
+    }
+    *p_result = result;
+    return true;
+}
+
+#define read_leb_uint32(p, p_end, res)                        \
+    do {                                                      \
+        uint32 off = 0;                                       \
+        uint64 res64;                                         \
+        if (!read_leb(cc, p, p_end, &off, 32, false, &res64)) \
+            return false;                                     \
+        p += off;                                             \
+        res = (uint32)res64;                                  \
+    } while (0)
+
+#define read_leb_int32(p, p_end, res)                        \
+    do {                                                     \
+        uint32 off = 0;                                      \
+        uint64 res64;                                        \
+        if (!read_leb(cc, p, p_end, &off, 32, true, &res64)) \
+            return false;                                    \
+        p += off;                                            \
+        res = (int32)res64;                                  \
+    } while (0)
+
+#define read_leb_int64(p, p_end, res)                        \
+    do {                                                     \
+        uint32 off = 0;                                      \
+        uint64 res64;                                        \
+        if (!read_leb(cc, p, p_end, &off, 64, true, &res64)) \
+            return false;                                    \
+        p += off;                                            \
+        res = (int64)res64;                                  \
+    } while (0)
+
+static bool
+jit_compile_func(JitCompContext *cc)
+{
+    WASMFunction *cur_func = cc->cur_wasm_func;
+    WASMType *func_type = NULL;
+    uint8 *frame_ip = cur_func->code, opcode, *p_f32, *p_f64;
+    uint8 *frame_ip_end = frame_ip + cur_func->code_size;
+    uint8 *param_types = NULL, *result_types = NULL, value_type;
+    uint16 param_count, result_count;
+    uint32 br_depth, *br_depths, br_count;
+    uint32 func_idx, type_idx, mem_idx, local_idx, global_idx, i;
+    uint32 bytes = 4, align, offset;
+    bool merge_cmp_and_if = false, merge_cmp_and_br_if = false;
+    bool sign = true;
+    int32 i32_const;
+    int64 i64_const;
+    float32 f32_const;
+    float64 f64_const;
+
+    while (frame_ip < frame_ip_end) {
+        cc->jit_frame->ip = frame_ip;
+        opcode = *frame_ip++;
+
+#if 0 /* TODO */
+#if WASM_ENABLE_THREAD_MGR != 0
+    /* Insert suspend check point */
+    if (cc->enable_thread_mgr) {
+        if (!check_suspend_flags(cc, func_ctx))
+            return false;
+    }
+#endif
+#endif
+
+        switch (opcode) {
+            case WASM_OP_UNREACHABLE:
+                if (!jit_compile_op_unreachable(cc, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_NOP:
+                break;
+
+            case WASM_OP_BLOCK:
+            case WASM_OP_LOOP:
+            case WASM_OP_IF:
+            {
+                value_type = *frame_ip++;
+                if (value_type == VALUE_TYPE_I32 || value_type == VALUE_TYPE_I64
+                    || value_type == VALUE_TYPE_F32
+                    || value_type == VALUE_TYPE_F64
+                    || value_type == VALUE_TYPE_V128
+                    || value_type == VALUE_TYPE_VOID
+                    || value_type == VALUE_TYPE_FUNCREF
+                    || value_type == VALUE_TYPE_EXTERNREF) {
+                    param_count = 0;
+                    param_types = NULL;
+                    if (value_type == VALUE_TYPE_VOID) {
+                        result_count = 0;
+                        result_types = NULL;
+                    }
+                    else {
+                        result_count = 1;
+                        result_types = &value_type;
+                    }
+                }
+                else {
+                    jit_set_last_error(cc, "unsupported value type");
+                    return false;
+                }
+                if (!jit_compile_op_block(
+                        cc, &frame_ip, frame_ip_end,
+                        (uint32)(LABEL_TYPE_BLOCK + opcode - WASM_OP_BLOCK),
+                        param_count, param_types, result_count, result_types,
+                        merge_cmp_and_if))
+                    return false;
+                /* Clear flag */
+                merge_cmp_and_if = false;
+                break;
+            }
+            case EXT_OP_BLOCK:
+            case EXT_OP_LOOP:
+            case EXT_OP_IF:
+            {
+                read_leb_uint32(frame_ip, frame_ip_end, type_idx);
+                func_type = cc->cur_wasm_module->types[type_idx];
+                param_count = func_type->param_count;
+                param_types = func_type->types;
+                result_count = func_type->result_count;
+                result_types = func_type->types + param_count;
+                if (!jit_compile_op_block(
+                        cc, &frame_ip, frame_ip_end,
+                        (uint32)(LABEL_TYPE_BLOCK + opcode - EXT_OP_BLOCK),
+                        param_count, param_types, result_count, result_types,
+                        merge_cmp_and_if))
+                    return false;
+                /* Clear flag */
+                merge_cmp_and_if = false;
+                break;
+            }
+
+            case WASM_OP_ELSE:
+                if (!jit_compile_op_else(cc, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_END:
+                if (!jit_compile_op_end(cc, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_BR:
+                read_leb_uint32(frame_ip, frame_ip_end, br_depth);
+                if (!jit_compile_op_br(cc, br_depth, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_BR_IF:
+                read_leb_uint32(frame_ip, frame_ip_end, br_depth);
+                if (!jit_compile_op_br_if(cc, br_depth, merge_cmp_and_br_if,
+                                          &frame_ip))
+                    return false;
+                /* Clear flag */
+                merge_cmp_and_br_if = false;
+                break;
+
+            case WASM_OP_BR_TABLE:
+                read_leb_uint32(frame_ip, frame_ip_end, br_count);
+                if (!(br_depths = jit_calloc((uint32)sizeof(uint32)
+                                             * (br_count + 1)))) {
+                    jit_set_last_error(cc, "allocate memory failed.");
+                    goto fail;
+                }
+#if WASM_ENABLE_FAST_INTERP != 0
+                for (i = 0; i <= br_count; i++)
+                    read_leb_uint32(frame_ip, frame_ip_end, br_depths[i]);
+#else
+                for (i = 0; i <= br_count; i++)
+                    br_depths[i] = *frame_ip++;
+#endif
+
+                if (!jit_compile_op_br_table(cc, br_depths, br_count,
+                                             &frame_ip)) {
+                    jit_free(br_depths);
+                    return false;
+                }
+
+                jit_free(br_depths);
+                break;
+
+#if WASM_ENABLE_FAST_INTERP == 0
+            case EXT_OP_BR_TABLE_CACHE:
+            {
+                BrTableCache *node = bh_list_first_elem(
+                    cc->cur_wasm_module->br_table_cache_list);
+                BrTableCache *node_next;
+                uint8 *p_opcode = frame_ip - 1;
+
+                read_leb_uint32(frame_ip, frame_ip_end, br_count);
+
+                while (node) {
+                    node_next = bh_list_elem_next(node);
+                    if (node->br_table_op_addr == p_opcode) {
+                        br_depths = node->br_depths;
+                        if (!jit_compile_op_br_table(cc, br_depths, br_count,
+                                                     &frame_ip)) {
+                            return false;
+                        }
+                        break;
+                    }
+                    node = node_next;
+                }
+                bh_assert(node);
+
+                break;
+            }
+#endif
+
+            case WASM_OP_RETURN:
+                if (!jit_compile_op_return(cc, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_CALL:
+                read_leb_uint32(frame_ip, frame_ip_end, func_idx);
+                if (!jit_compile_op_call(cc, func_idx, false))
+                    return false;
+                break;
+
+            case WASM_OP_CALL_INDIRECT:
+            {
+                uint32 tbl_idx;
+
+                read_leb_uint32(frame_ip, frame_ip_end, type_idx);
+
+#if WASM_ENABLE_REF_TYPES != 0
+                read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+#else
+                frame_ip++;
+                tbl_idx = 0;
+#endif
+
+                if (!jit_compile_op_call_indirect(cc, type_idx, tbl_idx))
+                    return false;
+                break;
+            }
+
+#if WASM_ENABLE_TAIL_CALL != 0
+            case WASM_OP_RETURN_CALL:
+                if (!cc->enable_tail_call) {
+                    jit_set_last_error(cc, "unsupported opcode");
+                    return false;
+                }
+                read_leb_uint32(frame_ip, frame_ip_end, func_idx);
+                if (!jit_compile_op_call(cc, func_idx, true))
+                    return false;
+                if (!jit_compile_op_return(cc, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_RETURN_CALL_INDIRECT:
+            {
+                uint32 tbl_idx;
+
+                if (!cc->enable_tail_call) {
+                    jit_set_last_error(cc, "unsupported opcode");
+                    return false;
+                }
+
+                read_leb_uint32(frame_ip, frame_ip_end, type_idx);
+#if WASM_ENABLE_REF_TYPES != 0
+                read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+#else
+                frame_ip++;
+                tbl_idx = 0;
+#endif
+
+                if (!jit_compile_op_call_indirect(cc, type_idx, tbl_idx))
+                    return false;
+                if (!jit_compile_op_return(cc, &frame_ip))
+                    return false;
+                break;
+            }
+#endif /* end of WASM_ENABLE_TAIL_CALL */
+
+            case WASM_OP_DROP:
+                if (!jit_compile_op_drop(cc, true))
+                    return false;
+                break;
+
+            case WASM_OP_DROP_64:
+                if (!jit_compile_op_drop(cc, false))
+                    return false;
+                break;
+
+            case WASM_OP_SELECT:
+                if (!jit_compile_op_select(cc, true))
+                    return false;
+                break;
+
+            case WASM_OP_SELECT_64:
+                if (!jit_compile_op_select(cc, false))
+                    return false;
+                break;
+
+#if WASM_ENABLE_REF_TYPES != 0
+            case WASM_OP_SELECT_T:
+            {
+                uint32 vec_len;
+
+                read_leb_uint32(frame_ip, frame_ip_end, vec_len);
+                bh_assert(vec_len == 1);
+                (void)vec_len;
+
+                type_idx = *frame_ip++;
+                if (!jit_compile_op_select(cc,
+                                           (type_idx != VALUE_TYPE_I64)
+                                               && (type_idx != VALUE_TYPE_F64)))
+                    return false;
+                break;
+            }
+            case WASM_OP_TABLE_GET:
+            {
+                uint32 tbl_idx;
+
+                read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                if (!jit_compile_op_table_get(cc, tbl_idx))
+                    return false;
+                break;
+            }
+            case WASM_OP_TABLE_SET:
+            {
+                uint32 tbl_idx;
+
+                read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                if (!jit_compile_op_table_set(cc, tbl_idx))
+                    return false;
+                break;
+            }
+            case WASM_OP_REF_NULL:
+            {
+                uint32 ref_type;
+                read_leb_uint32(frame_ip, frame_ip_end, ref_type);
+                if (!jit_compile_op_ref_null(cc, ref_type))
+                    return false;
+                break;
+            }
+            case WASM_OP_REF_IS_NULL:
+            {
+                if (!jit_compile_op_ref_is_null(cc))
+                    return false;
+                break;
+            }
+            case WASM_OP_REF_FUNC:
+            {
+                read_leb_uint32(frame_ip, frame_ip_end, func_idx);
+                if (!jit_compile_op_ref_func(cc, func_idx))
+                    return false;
+                break;
+            }
+#endif
+
+            case WASM_OP_GET_LOCAL:
+                read_leb_uint32(frame_ip, frame_ip_end, local_idx);
+                if (!jit_compile_op_get_local(cc, local_idx))
+                    return false;
+                break;
+
+            case WASM_OP_SET_LOCAL:
+                read_leb_uint32(frame_ip, frame_ip_end, local_idx);
+                if (!jit_compile_op_set_local(cc, local_idx))
+                    return false;
+                break;
+
+            case WASM_OP_TEE_LOCAL:
+                read_leb_uint32(frame_ip, frame_ip_end, local_idx);
+                if (!jit_compile_op_tee_local(cc, local_idx))
+                    return false;
+                break;
+
+            case WASM_OP_GET_GLOBAL:
+            case WASM_OP_GET_GLOBAL_64:
+                read_leb_uint32(frame_ip, frame_ip_end, global_idx);
+                if (!jit_compile_op_get_global(cc, global_idx))
+                    return false;
+                break;
+
+            case WASM_OP_SET_GLOBAL:
+            case WASM_OP_SET_GLOBAL_64:
+            case WASM_OP_SET_GLOBAL_AUX_STACK:
+                read_leb_uint32(frame_ip, frame_ip_end, global_idx);
+                if (!jit_compile_op_set_global(
+                        cc, global_idx,
+                        opcode == WASM_OP_SET_GLOBAL_AUX_STACK ? true : false))
+                    return false;
+                break;
+
+            case WASM_OP_I32_LOAD:
+                bytes = 4;
+                sign = true;
+                goto op_i32_load;
+            case WASM_OP_I32_LOAD8_S:
+            case WASM_OP_I32_LOAD8_U:
+                bytes = 1;
+                sign = (opcode == WASM_OP_I32_LOAD8_S) ? true : false;
+                goto op_i32_load;
+            case WASM_OP_I32_LOAD16_S:
+            case WASM_OP_I32_LOAD16_U:
+                bytes = 2;
+                sign = (opcode == WASM_OP_I32_LOAD16_S) ? true : false;
+            op_i32_load:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_i32_load(cc, align, offset, bytes, sign,
+                                             false))
+                    return false;
+                break;
+
+            case WASM_OP_I64_LOAD:
+                bytes = 8;
+                sign = true;
+                goto op_i64_load;
+            case WASM_OP_I64_LOAD8_S:
+            case WASM_OP_I64_LOAD8_U:
+                bytes = 1;
+                sign = (opcode == WASM_OP_I64_LOAD8_S) ? true : false;
+                goto op_i64_load;
+            case WASM_OP_I64_LOAD16_S:
+            case WASM_OP_I64_LOAD16_U:
+                bytes = 2;
+                sign = (opcode == WASM_OP_I64_LOAD16_S) ? true : false;
+                goto op_i64_load;
+            case WASM_OP_I64_LOAD32_S:
+            case WASM_OP_I64_LOAD32_U:
+                bytes = 4;
+                sign = (opcode == WASM_OP_I64_LOAD32_S) ? true : false;
+            op_i64_load:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_i64_load(cc, align, offset, bytes, sign,
+                                             false))
+                    return false;
+                break;
+
+            case WASM_OP_F32_LOAD:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_f32_load(cc, align, offset))
+                    return false;
+                break;
+
+            case WASM_OP_F64_LOAD:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_f64_load(cc, align, offset))
+                    return false;
+                break;
+
+            case WASM_OP_I32_STORE:
+                bytes = 4;
+                goto op_i32_store;
+            case WASM_OP_I32_STORE8:
+                bytes = 1;
+                goto op_i32_store;
+            case WASM_OP_I32_STORE16:
+                bytes = 2;
+            op_i32_store:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_i32_store(cc, align, offset, bytes, false))
+                    return false;
+                break;
+
+            case WASM_OP_I64_STORE:
+                bytes = 8;
+                goto op_i64_store;
+            case WASM_OP_I64_STORE8:
+                bytes = 1;
+                goto op_i64_store;
+            case WASM_OP_I64_STORE16:
+                bytes = 2;
+                goto op_i64_store;
+            case WASM_OP_I64_STORE32:
+                bytes = 4;
+            op_i64_store:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_i64_store(cc, align, offset, bytes, false))
+                    return false;
+                break;
+
+            case WASM_OP_F32_STORE:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_f32_store(cc, align, offset))
+                    return false;
+                break;
+
+            case WASM_OP_F64_STORE:
+                read_leb_uint32(frame_ip, frame_ip_end, align);
+                read_leb_uint32(frame_ip, frame_ip_end, offset);
+                if (!jit_compile_op_f64_store(cc, align, offset))
+                    return false;
+                break;
+
+            case WASM_OP_MEMORY_SIZE:
+                read_leb_uint32(frame_ip, frame_ip_end, mem_idx);
+                if (!jit_compile_op_memory_size(cc, mem_idx))
+                    return false;
+                break;
+
+            case WASM_OP_MEMORY_GROW:
+                read_leb_uint32(frame_ip, frame_ip_end, mem_idx);
+                if (!jit_compile_op_memory_grow(cc, mem_idx))
+                    return false;
+                break;
+
+            case WASM_OP_I32_CONST:
+                read_leb_int32(frame_ip, frame_ip_end, i32_const);
+                if (!jit_compile_op_i32_const(cc, i32_const))
+                    return false;
+                break;
+
+            case WASM_OP_I64_CONST:
+                read_leb_int64(frame_ip, frame_ip_end, i64_const);
+                if (!jit_compile_op_i64_const(cc, i64_const))
+                    return false;
+                break;
+
+            case WASM_OP_F32_CONST:
+                p_f32 = (uint8 *)&f32_const;
+                for (i = 0; i < sizeof(float32); i++)
+                    *p_f32++ = *frame_ip++;
+                if (!jit_compile_op_f32_const(cc, f32_const))
+                    return false;
+                break;
+
+            case WASM_OP_F64_CONST:
+                p_f64 = (uint8 *)&f64_const;
+                for (i = 0; i < sizeof(float64); i++)
+                    *p_f64++ = *frame_ip++;
+                if (!jit_compile_op_f64_const(cc, f64_const))
+                    return false;
+                break;
+
+            case WASM_OP_I32_EQZ:
+            case WASM_OP_I32_EQ:
+            case WASM_OP_I32_NE:
+            case WASM_OP_I32_LT_S:
+            case WASM_OP_I32_LT_U:
+            case WASM_OP_I32_GT_S:
+            case WASM_OP_I32_GT_U:
+            case WASM_OP_I32_LE_S:
+            case WASM_OP_I32_LE_U:
+            case WASM_OP_I32_GE_S:
+            case WASM_OP_I32_GE_U:
+                if (!jit_compile_op_i32_compare(cc, INT_EQZ + opcode
+                                                        - WASM_OP_I32_EQZ))
+                    return false;
+                if (frame_ip < frame_ip_end) {
+                    /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
+                    if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
+                        merge_cmp_and_if = true;
+                    if (*frame_ip == WASM_OP_BR_IF)
+                        merge_cmp_and_br_if = true;
+                }
+                break;
+
+            case WASM_OP_I64_EQZ:
+            case WASM_OP_I64_EQ:
+            case WASM_OP_I64_NE:
+            case WASM_OP_I64_LT_S:
+            case WASM_OP_I64_LT_U:
+            case WASM_OP_I64_GT_S:
+            case WASM_OP_I64_GT_U:
+            case WASM_OP_I64_LE_S:
+            case WASM_OP_I64_LE_U:
+            case WASM_OP_I64_GE_S:
+            case WASM_OP_I64_GE_U:
+                if (!jit_compile_op_i64_compare(cc, INT_EQZ + opcode
+                                                        - WASM_OP_I64_EQZ))
+                    return false;
+                if (frame_ip < frame_ip_end) {
+                    /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
+                    if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
+                        merge_cmp_and_if = true;
+                    if (*frame_ip == WASM_OP_BR_IF)
+                        merge_cmp_and_br_if = true;
+                }
+                break;
+
+            case WASM_OP_F32_EQ:
+            case WASM_OP_F32_NE:
+            case WASM_OP_F32_LT:
+            case WASM_OP_F32_GT:
+            case WASM_OP_F32_LE:
+            case WASM_OP_F32_GE:
+                if (!jit_compile_op_f32_compare(cc, FLOAT_EQ + opcode
+                                                        - WASM_OP_F32_EQ))
+                    return false;
+                if (frame_ip < frame_ip_end) {
+                    /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
+                    if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
+                        merge_cmp_and_if = true;
+                    if (*frame_ip == WASM_OP_BR_IF)
+                        merge_cmp_and_br_if = true;
+                }
+                break;
+
+            case WASM_OP_F64_EQ:
+            case WASM_OP_F64_NE:
+            case WASM_OP_F64_LT:
+            case WASM_OP_F64_GT:
+            case WASM_OP_F64_LE:
+            case WASM_OP_F64_GE:
+                if (!jit_compile_op_f64_compare(cc, FLOAT_EQ + opcode
+                                                        - WASM_OP_F64_EQ))
+                    return false;
+                if (frame_ip < frame_ip_end) {
+                    /* Merge `CMP, SELECTcc, CMP, BNE` insns into `CMP, Bcc` */
+                    if (*frame_ip == WASM_OP_IF || *frame_ip == EXT_OP_IF)
+                        merge_cmp_and_if = true;
+                    if (*frame_ip == WASM_OP_BR_IF)
+                        merge_cmp_and_br_if = true;
+                }
+                break;
+
+            case WASM_OP_I32_CLZ:
+                if (!jit_compile_op_i32_clz(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_CTZ:
+                if (!jit_compile_op_i32_ctz(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_POPCNT:
+                if (!jit_compile_op_i32_popcnt(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_ADD:
+            case WASM_OP_I32_SUB:
+            case WASM_OP_I32_MUL:
+            case WASM_OP_I32_DIV_S:
+            case WASM_OP_I32_DIV_U:
+            case WASM_OP_I32_REM_S:
+            case WASM_OP_I32_REM_U:
+                if (!jit_compile_op_i32_arithmetic(
+                        cc, INT_ADD + opcode - WASM_OP_I32_ADD, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_I32_AND:
+            case WASM_OP_I32_OR:
+            case WASM_OP_I32_XOR:
+                if (!jit_compile_op_i32_bitwise(cc, INT_SHL + opcode
+                                                        - WASM_OP_I32_AND))
+                    return false;
+                break;
+
+            case WASM_OP_I32_SHL:
+            case WASM_OP_I32_SHR_S:
+            case WASM_OP_I32_SHR_U:
+            case WASM_OP_I32_ROTL:
+            case WASM_OP_I32_ROTR:
+                if (!jit_compile_op_i32_shift(cc, INT_SHL + opcode
+                                                      - WASM_OP_I32_SHL))
+                    return false;
+                break;
+
+            case WASM_OP_I64_CLZ:
+                if (!jit_compile_op_i64_clz(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I64_CTZ:
+                if (!jit_compile_op_i64_ctz(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I64_POPCNT:
+                if (!jit_compile_op_i64_popcnt(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I64_ADD:
+            case WASM_OP_I64_SUB:
+            case WASM_OP_I64_MUL:
+            case WASM_OP_I64_DIV_S:
+            case WASM_OP_I64_DIV_U:
+            case WASM_OP_I64_REM_S:
+            case WASM_OP_I64_REM_U:
+                if (!jit_compile_op_i64_arithmetic(
+                        cc, INT_ADD + opcode - WASM_OP_I64_ADD, &frame_ip))
+                    return false;
+                break;
+
+            case WASM_OP_I64_AND:
+            case WASM_OP_I64_OR:
+            case WASM_OP_I64_XOR:
+                if (!jit_compile_op_i64_bitwise(cc, INT_SHL + opcode
+                                                        - WASM_OP_I64_AND))
+                    return false;
+                break;
+
+            case WASM_OP_I64_SHL:
+            case WASM_OP_I64_SHR_S:
+            case WASM_OP_I64_SHR_U:
+            case WASM_OP_I64_ROTL:
+            case WASM_OP_I64_ROTR:
+                if (!jit_compile_op_i64_shift(cc, INT_SHL + opcode
+                                                      - WASM_OP_I64_SHL))
+                    return false;
+                break;
+
+            case WASM_OP_F32_ABS:
+            case WASM_OP_F32_NEG:
+            case WASM_OP_F32_CEIL:
+            case WASM_OP_F32_FLOOR:
+            case WASM_OP_F32_TRUNC:
+            case WASM_OP_F32_NEAREST:
+            case WASM_OP_F32_SQRT:
+                if (!jit_compile_op_f32_math(cc, FLOAT_ABS + opcode
+                                                     - WASM_OP_F32_ABS))
+                    return false;
+                break;
+
+            case WASM_OP_F32_ADD:
+            case WASM_OP_F32_SUB:
+            case WASM_OP_F32_MUL:
+            case WASM_OP_F32_DIV:
+            case WASM_OP_F32_MIN:
+            case WASM_OP_F32_MAX:
+                if (!jit_compile_op_f32_arithmetic(cc, FLOAT_ADD + opcode
+                                                           - WASM_OP_F32_ADD))
+                    return false;
+                break;
+
+            case WASM_OP_F32_COPYSIGN:
+                if (!jit_compile_op_f32_copysign(cc))
+                    return false;
+                break;
+
+            case WASM_OP_F64_ABS:
+            case WASM_OP_F64_NEG:
+            case WASM_OP_F64_CEIL:
+            case WASM_OP_F64_FLOOR:
+            case WASM_OP_F64_TRUNC:
+            case WASM_OP_F64_NEAREST:
+            case WASM_OP_F64_SQRT:
+                if (!jit_compile_op_f64_math(cc, FLOAT_ABS + opcode
+                                                     - WASM_OP_F64_ABS))
+                    return false;
+                break;
+
+            case WASM_OP_F64_ADD:
+            case WASM_OP_F64_SUB:
+            case WASM_OP_F64_MUL:
+            case WASM_OP_F64_DIV:
+            case WASM_OP_F64_MIN:
+            case WASM_OP_F64_MAX:
+                if (!jit_compile_op_f64_arithmetic(cc, FLOAT_ADD + opcode
+                                                           - WASM_OP_F64_ADD))
+                    return false;
+                break;
+
+            case WASM_OP_F64_COPYSIGN:
+                if (!jit_compile_op_f64_copysign(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_WRAP_I64:
+                if (!jit_compile_op_i32_wrap_i64(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_TRUNC_S_F32:
+            case WASM_OP_I32_TRUNC_U_F32:
+                sign = (opcode == WASM_OP_I32_TRUNC_S_F32) ? true : false;
+                if (!jit_compile_op_i32_trunc_f32(cc, sign, false))
+                    return false;
+                break;
+
+            case WASM_OP_I32_TRUNC_S_F64:
+            case WASM_OP_I32_TRUNC_U_F64:
+                sign = (opcode == WASM_OP_I32_TRUNC_S_F64) ? true : false;
+                if (!jit_compile_op_i32_trunc_f64(cc, sign, false))
+                    return false;
+                break;
+
+            case WASM_OP_I64_EXTEND_S_I32:
+            case WASM_OP_I64_EXTEND_U_I32:
+                sign = (opcode == WASM_OP_I64_EXTEND_S_I32) ? true : false;
+                if (!jit_compile_op_i64_extend_i32(cc, sign))
+                    return false;
+                break;
+
+            case WASM_OP_I64_TRUNC_S_F32:
+            case WASM_OP_I64_TRUNC_U_F32:
+                sign = (opcode == WASM_OP_I64_TRUNC_S_F32) ? true : false;
+                if (!jit_compile_op_i64_trunc_f32(cc, sign, false))
+                    return false;
+                break;
+
+            case WASM_OP_I64_TRUNC_S_F64:
+            case WASM_OP_I64_TRUNC_U_F64:
+                sign = (opcode == WASM_OP_I64_TRUNC_S_F64) ? true : false;
+                if (!jit_compile_op_i64_trunc_f64(cc, sign, false))
+                    return false;
+                break;
+
+            case WASM_OP_F32_CONVERT_S_I32:
+            case WASM_OP_F32_CONVERT_U_I32:
+                sign = (opcode == WASM_OP_F32_CONVERT_S_I32) ? true : false;
+                if (!jit_compile_op_f32_convert_i32(cc, sign))
+                    return false;
+                break;
+
+            case WASM_OP_F32_CONVERT_S_I64:
+            case WASM_OP_F32_CONVERT_U_I64:
+                sign = (opcode == WASM_OP_F32_CONVERT_S_I64) ? true : false;
+                if (!jit_compile_op_f32_convert_i64(cc, sign))
+                    return false;
+                break;
+
+            case WASM_OP_F32_DEMOTE_F64:
+                if (!jit_compile_op_f32_demote_f64(cc))
+                    return false;
+                break;
+
+            case WASM_OP_F64_CONVERT_S_I32:
+            case WASM_OP_F64_CONVERT_U_I32:
+                sign = (opcode == WASM_OP_F64_CONVERT_S_I32) ? true : false;
+                if (!jit_compile_op_f64_convert_i32(cc, sign))
+                    return false;
+                break;
+
+            case WASM_OP_F64_CONVERT_S_I64:
+            case WASM_OP_F64_CONVERT_U_I64:
+                sign = (opcode == WASM_OP_F64_CONVERT_S_I64) ? true : false;
+                if (!jit_compile_op_f64_convert_i64(cc, sign))
+                    return false;
+                break;
+
+            case WASM_OP_F64_PROMOTE_F32:
+                if (!jit_compile_op_f64_promote_f32(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_REINTERPRET_F32:
+                if (!jit_compile_op_i32_reinterpret_f32(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I64_REINTERPRET_F64:
+                if (!jit_compile_op_i64_reinterpret_f64(cc))
+                    return false;
+                break;
+
+            case WASM_OP_F32_REINTERPRET_I32:
+                if (!jit_compile_op_f32_reinterpret_i32(cc))
+                    return false;
+                break;
+
+            case WASM_OP_F64_REINTERPRET_I64:
+                if (!jit_compile_op_f64_reinterpret_i64(cc))
+                    return false;
+                break;
+
+            case WASM_OP_I32_EXTEND8_S:
+                if (!jit_compile_op_i32_extend_i32(cc, 8))
+                    return false;
+                break;
+
+            case WASM_OP_I32_EXTEND16_S:
+                if (!jit_compile_op_i32_extend_i32(cc, 16))
+                    return false;
+                break;
+
+            case WASM_OP_I64_EXTEND8_S:
+                if (!jit_compile_op_i64_extend_i64(cc, 8))
+                    return false;
+                break;
+
+            case WASM_OP_I64_EXTEND16_S:
+                if (!jit_compile_op_i64_extend_i64(cc, 16))
+                    return false;
+                break;
+
+            case WASM_OP_I64_EXTEND32_S:
+                if (!jit_compile_op_i64_extend_i64(cc, 32))
+                    return false;
+                break;
+
+            case WASM_OP_MISC_PREFIX:
+            {
+                uint32 opcode1;
+
+                read_leb_uint32(frame_ip, frame_ip_end, opcode1);
+                opcode = (uint32)opcode1;
+
+                switch (opcode) {
+                    case WASM_OP_I32_TRUNC_SAT_S_F32:
+                    case WASM_OP_I32_TRUNC_SAT_U_F32:
+                        sign = (opcode == WASM_OP_I32_TRUNC_SAT_S_F32) ? true
+                                                                       : false;
+                        if (!jit_compile_op_i32_trunc_f32(cc, sign, true))
+                            return false;
+                        break;
+                    case WASM_OP_I32_TRUNC_SAT_S_F64:
+                    case WASM_OP_I32_TRUNC_SAT_U_F64:
+                        sign = (opcode == WASM_OP_I32_TRUNC_SAT_S_F64) ? true
+                                                                       : false;
+                        if (!jit_compile_op_i32_trunc_f64(cc, sign, true))
+                            return false;
+                        break;
+                    case WASM_OP_I64_TRUNC_SAT_S_F32:
+                    case WASM_OP_I64_TRUNC_SAT_U_F32:
+                        sign = (opcode == WASM_OP_I64_TRUNC_SAT_S_F32) ? true
+                                                                       : false;
+                        if (!jit_compile_op_i64_trunc_f32(cc, sign, true))
+                            return false;
+                        break;
+                    case WASM_OP_I64_TRUNC_SAT_S_F64:
+                    case WASM_OP_I64_TRUNC_SAT_U_F64:
+                        sign = (opcode == WASM_OP_I64_TRUNC_SAT_S_F64) ? true
+                                                                       : false;
+                        if (!jit_compile_op_i64_trunc_f64(cc, sign, true))
+                            return false;
+                        break;
+#if WASM_ENABLE_BULK_MEMORY != 0
+                    case WASM_OP_MEMORY_INIT:
+                    {
+                        uint32 seg_idx = 0;
+                        read_leb_uint32(frame_ip, frame_ip_end, seg_idx);
+                        read_leb_uint32(frame_ip, frame_ip_end, mem_idx);
+                        if (!jit_compile_op_memory_init(cc, mem_idx, seg_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_DATA_DROP:
+                    {
+                        uint32 seg_idx;
+                        read_leb_uint32(frame_ip, frame_ip_end, seg_idx);
+                        if (!jit_compile_op_data_drop(cc, seg_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_MEMORY_COPY:
+                    {
+                        uint32 src_mem_idx, dst_mem_idx;
+                        read_leb_uint32(frame_ip, frame_ip_end, src_mem_idx);
+                        read_leb_uint32(frame_ip, frame_ip_end, dst_mem_idx);
+                        if (!jit_compile_op_memory_copy(cc, src_mem_idx,
+                                                        dst_mem_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_MEMORY_FILL:
+                    {
+                        read_leb_uint32(frame_ip, frame_ip_end, mem_idx);
+                        if (!jit_compile_op_memory_fill(cc, mem_idx))
+                            return false;
+                        break;
+                    }
+#endif /* WASM_ENABLE_BULK_MEMORY */
+#if WASM_ENABLE_REF_TYPES != 0
+                    case WASM_OP_TABLE_INIT:
+                    {
+                        uint32 tbl_idx, tbl_seg_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_seg_idx);
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                        if (!jit_compile_op_table_init(cc, tbl_idx,
+                                                       tbl_seg_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_ELEM_DROP:
+                    {
+                        uint32 tbl_seg_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_seg_idx);
+                        if (!jit_compile_op_elem_drop(cc, tbl_seg_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_TABLE_COPY:
+                    {
+                        uint32 src_tbl_idx, dst_tbl_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, dst_tbl_idx);
+                        read_leb_uint32(frame_ip, frame_ip_end, src_tbl_idx);
+                        if (!jit_compile_op_table_copy(cc, src_tbl_idx,
+                                                       dst_tbl_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_TABLE_GROW:
+                    {
+                        uint32 tbl_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                        if (!jit_compile_op_table_grow(cc, tbl_idx))
+                            return false;
+                        break;
+                    }
+
+                    case WASM_OP_TABLE_SIZE:
+                    {
+                        uint32 tbl_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                        if (!jit_compile_op_table_size(cc, tbl_idx))
+                            return false;
+                        break;
+                    }
+                    case WASM_OP_TABLE_FILL:
+                    {
+                        uint32 tbl_idx;
+
+                        read_leb_uint32(frame_ip, frame_ip_end, tbl_idx);
+                        if (!jit_compile_op_table_fill(cc, tbl_idx))
+                            return false;
+                        break;
+                    }
+#endif /* WASM_ENABLE_REF_TYPES */
+                    default:
+                        jit_set_last_error(cc, "unsupported opcode");
+                        return false;
+                }
+                break;
+            }
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+            case WASM_OP_ATOMIC_PREFIX:
+            {
+                uint8 bin_op, op_type;
+
+                if (frame_ip < frame_ip_end) {
+                    opcode = *frame_ip++;
+                }
+                if (opcode != WASM_OP_ATOMIC_FENCE) {
+                    read_leb_uint32(frame_ip, frame_ip_end, align);
+                    read_leb_uint32(frame_ip, frame_ip_end, offset);
+                }
+                switch (opcode) {
+                    case WASM_OP_ATOMIC_WAIT32:
+                        if (!jit_compile_op_atomic_wait(cc, VALUE_TYPE_I32,
+                                                        align, offset, 4))
+                            return false;
+                        break;
+                    case WASM_OP_ATOMIC_WAIT64:
+                        if (!jit_compile_op_atomic_wait(cc, VALUE_TYPE_I64,
+                                                        align, offset, 8))
+                            return false;
+                        break;
+                    case WASM_OP_ATOMIC_NOTIFY:
+                        if (!jit_compiler_op_atomic_notify(cc, align, offset,
+                                                           bytes))
+                            return false;
+                        break;
+                    case WASM_OP_ATOMIC_I32_LOAD:
+                        bytes = 4;
+                        goto op_atomic_i32_load;
+                    case WASM_OP_ATOMIC_I32_LOAD8_U:
+                        bytes = 1;
+                        goto op_atomic_i32_load;
+                    case WASM_OP_ATOMIC_I32_LOAD16_U:
+                        bytes = 2;
+                    op_atomic_i32_load:
+                        if (!jit_compile_op_i32_load(cc, align, offset, bytes,
+                                                     sign, true))
+                            return false;
+                        break;
+
+                    case WASM_OP_ATOMIC_I64_LOAD:
+                        bytes = 8;
+                        goto op_atomic_i64_load;
+                    case WASM_OP_ATOMIC_I64_LOAD8_U:
+                        bytes = 1;
+                        goto op_atomic_i64_load;
+                    case WASM_OP_ATOMIC_I64_LOAD16_U:
+                        bytes = 2;
+                        goto op_atomic_i64_load;
+                    case WASM_OP_ATOMIC_I64_LOAD32_U:
+                        bytes = 4;
+                    op_atomic_i64_load:
+                        if (!jit_compile_op_i64_load(cc, align, offset, bytes,
+                                                     sign, true))
+                            return false;
+                        break;
+
+                    case WASM_OP_ATOMIC_I32_STORE:
+                        bytes = 4;
+                        goto op_atomic_i32_store;
+                    case WASM_OP_ATOMIC_I32_STORE8:
+                        bytes = 1;
+                        goto op_atomic_i32_store;
+                    case WASM_OP_ATOMIC_I32_STORE16:
+                        bytes = 2;
+                    op_atomic_i32_store:
+                        if (!jit_compile_op_i32_store(cc, align, offset, bytes,
+                                                      true))
+                            return false;
+                        break;
+
+                    case WASM_OP_ATOMIC_I64_STORE:
+                        bytes = 8;
+                        goto op_atomic_i64_store;
+                    case WASM_OP_ATOMIC_I64_STORE8:
+                        bytes = 1;
+                        goto op_atomic_i64_store;
+                    case WASM_OP_ATOMIC_I64_STORE16:
+                        bytes = 2;
+                        goto op_atomic_i64_store;
+                    case WASM_OP_ATOMIC_I64_STORE32:
+                        bytes = 4;
+                    op_atomic_i64_store:
+                        if (!jit_compile_op_i64_store(cc, align, offset, bytes,
+                                                      true))
+                            return false;
+                        break;
+
+                    case WASM_OP_ATOMIC_RMW_I32_CMPXCHG:
+                        bytes = 4;
+                        op_type = VALUE_TYPE_I32;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I64_CMPXCHG:
+                        bytes = 8;
+                        op_type = VALUE_TYPE_I64;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I32_CMPXCHG8_U:
+                        bytes = 1;
+                        op_type = VALUE_TYPE_I32;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I32_CMPXCHG16_U:
+                        bytes = 2;
+                        op_type = VALUE_TYPE_I32;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I64_CMPXCHG8_U:
+                        bytes = 1;
+                        op_type = VALUE_TYPE_I64;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I64_CMPXCHG16_U:
+                        bytes = 2;
+                        op_type = VALUE_TYPE_I64;
+                        goto op_atomic_cmpxchg;
+                    case WASM_OP_ATOMIC_RMW_I64_CMPXCHG32_U:
+                        bytes = 4;
+                        op_type = VALUE_TYPE_I64;
+                    op_atomic_cmpxchg:
+                        if (!jit_compile_op_atomic_cmpxchg(cc, op_type, align,
+                                                           offset, bytes))
+                            return false;
+                        break;
+
+                        /* TODO */
+                        /*
+                        COMPILE_ATOMIC_RMW(Add, ADD);
+                        COMPILE_ATOMIC_RMW(Sub, SUB);
+                        COMPILE_ATOMIC_RMW(And, AND);
+                        COMPILE_ATOMIC_RMW(Or, OR);
+                        COMPILE_ATOMIC_RMW(Xor, XOR);
+                        COMPILE_ATOMIC_RMW(Xchg, XCHG);
+                        */
+
+                    build_atomic_rmw:
+                        if (!jit_compile_op_atomic_rmw(cc, bin_op, op_type,
+                                                       align, offset, bytes))
+                            return false;
+                        break;
+
+                    default:
+                        jit_set_last_error(cc, "unsupported opcode");
+                        return false;
+                }
+                break;
+            }
+#endif /* end of WASM_ENABLE_SHARED_MEMORY */
+
+            default:
+                jit_set_last_error(cc, "unsupported opcode");
+                return false;
+        }
+        /* Error may occur when creating registers, basic blocks, insns,
+           consts and labels, in which the return value may be unchecked,
+           here we check again */
+        if (jit_get_last_error(cc)) {
+            return false;
+        }
+    }
+
+    (void)func_idx;
+    return true;
+fail:
+    return false;
+}
+
+JitBasicBlock *
+jit_frontend_translate_func(JitCompContext *cc)
+{
+    JitFrame *jit_frame;
+    JitBasicBlock *basic_block_entry;
+
+    if (!(jit_frame = init_func_translation(cc))) {
+        return NULL;
+    }
+
+    if (!(basic_block_entry = create_func_block(cc))) {
+        return NULL;
+    }
+
+    if (!jit_compile_func(cc)) {
+        return NULL;
+    }
+
+    return basic_block_entry;
+}
+
+#if 0
+#if WASM_ENABLE_THREAD_MGR != 0
+bool
+check_suspend_flags(JitCompContext *cc, JITFuncContext *func_ctx)
+{
+    LLVMValueRef terminate_addr, terminate_flags, flag, offset, res;
+    JitBasicBlock *terminate_check_block, non_terminate_block;
+    JITFuncType *jit_func_type = func_ctx->jit_func->func_type;
+    JitBasicBlock *terminate_block;
+
+    /* Offset of suspend_flags */
+    offset = I32_FIVE;
+
+    if (!(terminate_addr = LLVMBuildInBoundsGEP(
+              cc->builder, func_ctx->exec_env, &offset, 1, "terminate_addr"))) {
+        jit_set_last_error("llvm build in bounds gep failed");
+        return false;
+    }
+    if (!(terminate_addr =
+              LLVMBuildBitCast(cc->builder, terminate_addr, INT32_PTR_TYPE,
+                               "terminate_addr_ptr"))) {
+        jit_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+
+    if (!(terminate_flags =
+              LLVMBuildLoad(cc->builder, terminate_addr, "terminate_flags"))) {
+        jit_set_last_error("llvm build bit cast failed");
+        return false;
+    }
+    /* Set terminate_flags memory accecc to volatile, so that the value
+        will always be loaded from memory rather than register */
+    LLVMSetVolatile(terminate_flags, true);
+
+    CREATE_BASIC_BLOCK(terminate_check_block, "terminate_check");
+    MOVE_BASIC_BLOCK_AFTER_CURR(terminate_check_block);
+
+    CREATE_BASIC_BLOCK(non_terminate_block, "non_terminate");
+    MOVE_BASIC_BLOCK_AFTER_CURR(non_terminate_block);
+
+    BUILD_ICMP(LLVMIntSGT, terminate_flags, I32_ZERO, res, "need_terminate");
+    BUILD_COND_BR(res, terminate_check_block, non_terminate_block);
+
+    /* Move builder to terminate check block */
+    SET_BUILDER_POS(terminate_check_block);
+
+    CREATE_BASIC_BLOCK(terminate_block, "terminate");
+    MOVE_BASIC_BLOCK_AFTER_CURR(terminate_block);
+
+    if (!(flag = LLVMBuildAnd(cc->builder, terminate_flags, I32_ONE,
+                              "termination_flag"))) {
+        jit_set_last_error("llvm build AND failed");
+        return false;
+    }
+
+    BUILD_ICMP(LLVMIntSGT, flag, I32_ZERO, res, "need_terminate");
+    BUILD_COND_BR(res, terminate_block, non_terminate_block);
+
+    /* Move builder to terminate block */
+    SET_BUILDER_POS(terminate_block);
+    if (!jit_build_zero_function_ret(cc, func_ctx, jit_func_type)) {
+        goto fail;
+    }
+
+    /* Move builder to terminate block */
+    SET_BUILDER_POS(non_terminate_block);
+    return true;
+
+fail:
+    return false;
+}
+#endif /* End of WASM_ENABLE_THREAD_MGR */
+#endif

+ 521 - 0
core/iwasm/fast-jit/jit_frontend.h

@@ -0,0 +1,521 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_FRONTEND_H_
+#define _JIT_FRONTEND_H_
+
+#include "jit_utils.h"
+#include "jit_ir.h"
+#include "../interpreter/wasm_interp.h"
+#if WASM_ENABLE_AOT != 0
+#include "../aot/aot_runtime.h"
+#endif
+
+#if WASM_ENABLE_AOT == 0
+typedef enum IntCond {
+    INT_EQZ = 0,
+    INT_EQ,
+    INT_NE,
+    INT_LT_S,
+    INT_LT_U,
+    INT_GT_S,
+    INT_GT_U,
+    INT_LE_S,
+    INT_LE_U,
+    INT_GE_S,
+    INT_GE_U
+} IntCond;
+
+typedef enum FloatCond {
+    FLOAT_EQ = 0,
+    FLOAT_NE,
+    FLOAT_LT,
+    FLOAT_GT,
+    FLOAT_LE,
+    FLOAT_GE,
+    FLOAT_UNO
+} FloatCond;
+#else
+#define IntCond AOTIntCond
+#define FloatCond AOTFloatCond
+#endif
+
+typedef enum IntArithmetic {
+    INT_ADD = 0,
+    INT_SUB,
+    INT_MUL,
+    INT_DIV_S,
+    INT_DIV_U,
+    INT_REM_S,
+    INT_REM_U
+} IntArithmetic;
+
+typedef enum V128Arithmetic {
+    V128_ADD = 0,
+    V128_SUB,
+    V128_MUL,
+    V128_DIV,
+    V128_NEG,
+    V128_MIN,
+    V128_MAX,
+} V128Arithmetic;
+
+typedef enum IntBitwise {
+    INT_AND = 0,
+    INT_OR,
+    INT_XOR,
+} IntBitwise;
+
+typedef enum V128Bitwise {
+    V128_NOT,
+    V128_AND,
+    V128_ANDNOT,
+    V128_OR,
+    V128_XOR,
+    V128_BITSELECT,
+} V128Bitwise;
+
+typedef enum IntShift {
+    INT_SHL = 0,
+    INT_SHR_S,
+    INT_SHR_U,
+    INT_ROTL,
+    INT_ROTR
+} IntShift;
+
+typedef enum FloatMath {
+    FLOAT_ABS = 0,
+    FLOAT_NEG,
+    FLOAT_CEIL,
+    FLOAT_FLOOR,
+    FLOAT_TRUNC,
+    FLOAT_NEAREST,
+    FLOAT_SQRT
+} FloatMath;
+
+typedef enum FloatArithmetic {
+    FLOAT_ADD = 0,
+    FLOAT_SUB,
+    FLOAT_MUL,
+    FLOAT_DIV,
+    FLOAT_MIN,
+    FLOAT_MAX,
+} FloatArithmetic;
+
+typedef enum JitExceptionID {
+    JIT_EXCE_UNREACHABLE = 0,
+    JIT_EXCE_OUT_OF_MEMORY,
+    JIT_EXCE_OUT_OF_BOUNDS_MEMORY_ACCESS,
+    JIT_EXCE_INTEGER_OVERFLOW,
+    JIT_EXCE_INTEGER_DIVIDE_BY_ZERO,
+    JIT_EXCE_INVALID_CONVERSION_TO_INTEGER,
+    JIT_EXCE_INVALID_FUNCTION_TYPE_INDEX,
+    JIT_EXCE_INVALID_FUNCTION_INDEX,
+    JIT_EXCE_UNDEFINED_ELEMENT,
+    JIT_EXCE_UNINITIALIZED_ELEMENT,
+    JIT_EXCE_CALL_UNLINKED_IMPORT_FUNC,
+    JIT_EXCE_NATIVE_STACK_OVERFLOW,
+    JIT_EXCE_UNALIGNED_ATOMIC,
+    JIT_EXCE_AUX_STACK_OVERFLOW,
+    JIT_EXCE_AUX_STACK_UNDERFLOW,
+    JIT_EXCE_OUT_OF_BOUNDS_TABLE_ACCESS,
+    JIT_EXCE_OPERAND_STACK_OVERFLOW,
+    JIT_EXCE_ALREADY_THROWN,
+    JIT_EXCE_NUM,
+} JitExceptionID;
+
+/**
+ * Translate instructions in a function. The translated block must
+ * end with a branch instruction whose targets are offsets relating to
+ * the end bcip of the translated block, which are integral constants.
+ * If a target of a branch is really a constant value (which should be
+ * rare), put it into a register and then jump to the register instead
+ * of using the constant value directly in the target. In the
+ * translation process, don't create any new labels. The code bcip of
+ * the begin and end of the translated block is stored in the
+ * jit_annl_begin_bcip and jit_annl_end_bcip annotations of the label
+ * of the block, which must be the same as the bcips used in
+ * profiling.
+ *
+ * NOTE: the function must explicitly set SP to correct value when the
+ * entry's bcip is the function's entry address.
+ *
+ * @param cc containing compilation context of generated IR
+ * @param entry entry of the basic block to be translated. If its
+ * value is NULL, the function will clean up any pass local data that
+ * might be created previously.
+ * @param is_reached a bitmap recording which bytecode has been
+ * reached as a block entry
+ *
+ * @return IR block containing translated instructions if succeeds,
+ * NULL otherwise
+ */
+JitBasicBlock *
+jit_frontend_translate_func(JitCompContext *cc);
+
+/**
+ * Generate a block leaving the compiled code, which must store the
+ * target bcip and other necessary information for switching to
+ * interpreter or other compiled code and then jump to the exit of the
+ * cc.
+ *
+ * @param cc the compilation context
+ * @param bcip the target bytecode instruction pointer
+ * @param sp_offset stack pointer offset at the beginning of the block
+ *
+ * @return the leaving block if succeeds, NULL otherwise
+ */
+JitBlock *
+jit_frontend_gen_leaving_block(JitCompContext *cc, void *bcip,
+                               unsigned sp_offset);
+
+/**
+ * Lower the IR of the given compilation context.
+ *
+ * @param cc the compilation context
+ *
+ * @return true if succeeds, false otherwise
+ */
+bool
+jit_frontend_lower(JitCompContext *cc);
+
+JitReg
+get_module_inst_reg(JitFrame *frame);
+
+JitReg
+get_module_reg(JitFrame *frame);
+
+JitReg
+get_fast_jit_func_ptrs_reg(JitFrame *frame);
+
+JitReg
+get_global_data_reg(JitFrame *frame);
+
+JitReg
+get_aux_stack_bound_reg(JitFrame *frame);
+
+JitReg
+get_aux_stack_bottom_reg(JitFrame *frame);
+
+JitReg
+get_memories_reg(JitFrame *frame);
+
+JitReg
+get_memory_inst_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_memory_data_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_memory_data_end_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_mem_bound_check_1byte_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_mem_bound_check_2bytes_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_mem_bound_check_4bytes_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_mem_bound_check_8bytes_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_mem_bound_check_16bytes_reg(JitFrame *frame, uint32 mem_idx);
+
+JitReg
+get_tables_reg(JitFrame *frame);
+
+JitReg
+get_table_inst_reg(JitFrame *frame, uint32 table_idx);
+
+JitReg
+get_table_data_reg(JitFrame *frame, uint32 table_idx);
+
+JitReg
+get_table_cur_size_reg(JitFrame *frame, uint32 table_idx);
+
+void
+clear_fixed_virtual_regs(JitFrame *frame);
+
+void
+clear_memory_regs(JitFrame *frame);
+
+void
+clear_table_regs(JitFrame *frame);
+
+/**
+ * Get the offset from frame pointer to the n-th local variable slot.
+ *
+ * @param n the index to the local variable array
+ *
+ * @return the offset from frame pointer to the local variable slot
+ */
+static inline unsigned
+offset_of_local(unsigned n)
+{
+    return offsetof(WASMInterpFrame, lp) + n * 4;
+}
+
+/**
+ * Generate instruction to load an integer from the frame.
+ *
+ * This and the below gen_load_X functions generate instructions to
+ * load values from the frame into registers if the values have not
+ * been loaded yet.
+ *
+ * @param frame the frame information
+ * @param n slot index to the local variable array
+ *
+ * @return register holding the loaded value
+ */
+JitReg
+gen_load_i32(JitFrame *frame, unsigned n);
+
+/**
+ * Generate instruction to load a i64 integer from the frame.
+ *
+ * @param frame the frame information
+ * @param n slot index to the local variable array
+ *
+ * @return register holding the loaded value
+ */
+JitReg
+gen_load_i64(JitFrame *frame, unsigned n);
+
+/**
+ * Generate instruction to load a floating point value from the frame.
+ *
+ * @param frame the frame information
+ * @param n slot index to the local variable array
+ *
+ * @return register holding the loaded value
+ */
+JitReg
+gen_load_f32(JitFrame *frame, unsigned n);
+
+/**
+ * Generate instruction to load a double value from the frame.
+ *
+ * @param frame the frame information
+ * @param n slot index to the local variable array
+ *
+ * @return register holding the loaded value
+ */
+JitReg
+gen_load_f64(JitFrame *frame, unsigned n);
+
+/**
+ * Generate instructions to commit computation result to the frame.
+ * The general principle is to only commit values that will be used
+ * through the frame.
+ *
+ * @param frame the frame information
+ * @param begin the begin value slot to commit
+ * @param end the end value slot to commit
+ */
+void
+gen_commit_values(JitFrame *frame, JitValueSlot *begin, JitValueSlot *end);
+
+/**
+ * Generate instructions to commit SP and IP pointers to the frame.
+ *
+ * @param frame the frame information
+ */
+void
+gen_commit_sp_ip(JitFrame *frame);
+
+/**
+ * Generate commit instructions for the block end.
+ *
+ * @param frame the frame information
+ */
+static inline void
+gen_commit_for_branch(JitFrame *frame)
+{
+    gen_commit_values(frame, frame->lp, frame->sp);
+}
+
+/**
+ * Generate commit instructions for exception checks.
+ *
+ * @param frame the frame information
+ */
+static inline void
+gen_commit_for_exception(JitFrame *frame)
+{
+    gen_commit_values(frame, frame->lp, frame->lp + frame->max_locals);
+    gen_commit_sp_ip(frame);
+}
+
+/**
+ * Generate commit instructions to commit all status.
+ *
+ * @param frame the frame information
+ */
+static inline void
+gen_commit_for_all(JitFrame *frame)
+{
+    gen_commit_values(frame, frame->lp, frame->sp);
+    gen_commit_sp_ip(frame);
+}
+
+static inline void
+clear_values(JitFrame *frame)
+{
+    size_t total_size =
+        sizeof(JitValueSlot) * (frame->max_locals + frame->max_stacks);
+    memset(frame->lp, 0, total_size);
+    frame->committed_sp = NULL;
+    frame->committed_ip = NULL;
+    clear_fixed_virtual_regs(frame);
+}
+
+static inline void
+push_i32(JitFrame *frame, JitReg value)
+{
+    frame->sp->reg = value;
+    frame->sp->dirty = 1;
+    frame->sp++;
+}
+
+static inline void
+push_i64(JitFrame *frame, JitReg value)
+{
+    frame->sp->reg = value;
+    frame->sp->dirty = 1;
+    frame->sp++;
+    frame->sp->reg = value;
+    frame->sp->dirty = 1;
+    frame->sp++;
+}
+
+static inline void
+push_f32(JitFrame *frame, JitReg value)
+{
+    push_i32(frame, value);
+}
+
+static inline void
+push_f64(JitFrame *frame, JitReg value)
+{
+    push_i64(frame, value);
+}
+
+static inline JitReg
+pop_i32(JitFrame *frame)
+{
+    frame->sp--;
+    return gen_load_i32(frame, frame->sp - frame->lp);
+}
+
+static inline JitReg
+pop_i64(JitFrame *frame)
+{
+    frame->sp -= 2;
+    return gen_load_i64(frame, frame->sp - frame->lp);
+}
+
+static inline JitReg
+pop_f32(JitFrame *frame)
+{
+    frame->sp--;
+    return gen_load_f32(frame, frame->sp - frame->lp);
+}
+
+static inline JitReg
+pop_f64(JitFrame *frame)
+{
+    frame->sp -= 2;
+    return gen_load_f64(frame, frame->sp - frame->lp);
+}
+
+static inline void
+pop(JitFrame *frame, int n)
+{
+    frame->sp -= n;
+    memset(frame->sp, 0, n * sizeof(*frame->sp));
+}
+
+static inline JitReg
+local_i32(JitFrame *frame, int n)
+{
+    return gen_load_i32(frame, n);
+}
+
+static inline JitReg
+local_i64(JitFrame *frame, int n)
+{
+    return gen_load_i64(frame, n);
+}
+
+static inline JitReg
+local_f32(JitFrame *frame, int n)
+{
+    return gen_load_f32(frame, n);
+}
+
+static inline JitReg
+local_f64(JitFrame *frame, int n)
+{
+    return gen_load_f64(frame, n);
+}
+
+static void
+set_local_i32(JitFrame *frame, int n, JitReg val)
+{
+    frame->lp[n].reg = val;
+    frame->lp[n].dirty = 1;
+}
+
+static void
+set_local_i64(JitFrame *frame, int n, JitReg val)
+{
+    frame->lp[n].reg = val;
+    frame->lp[n].dirty = 1;
+    frame->lp[n + 1].reg = val;
+    frame->lp[n + 1].dirty = 1;
+}
+
+static inline void
+set_local_f32(JitFrame *frame, int n, JitReg val)
+{
+    set_local_i32(frame, n, val);
+}
+
+static inline void
+set_local_f64(JitFrame *frame, int n, JitReg val)
+{
+    set_local_i64(frame, n, val);
+}
+
+#define POP(jit_value, value_type)                         \
+    do {                                                   \
+        if (!jit_cc_pop_value(cc, value_type, &jit_value)) \
+            goto fail;                                     \
+    } while (0)
+
+#define POP_I32(v) POP(v, VALUE_TYPE_I32)
+#define POP_I64(v) POP(v, VALUE_TYPE_I64)
+#define POP_F32(v) POP(v, VALUE_TYPE_F32)
+#define POP_F64(v) POP(v, VALUE_TYPE_F64)
+#define POP_FUNCREF(v) POP(v, VALUE_TYPE_FUNCREF)
+#define POP_EXTERNREF(v) POP(v, VALUE_TYPE_EXTERNREF)
+
+#define PUSH(jit_value, value_type)                        \
+    do {                                                   \
+        if (!jit_cc_push_value(cc, value_type, jit_value)) \
+            goto fail;                                     \
+    } while (0)
+
+#define PUSH_I32(v) PUSH(v, VALUE_TYPE_I32)
+#define PUSH_I64(v) PUSH(v, VALUE_TYPE_I64)
+#define PUSH_F32(v) PUSH(v, VALUE_TYPE_F32)
+#define PUSH_F64(v) PUSH(v, VALUE_TYPE_F64)
+#define PUSH_FUNCREF(v) PUSH(v, VALUE_TYPE_FUNCREF)
+#define PUSH_EXTERNREF(v) PUSH(v, VALUE_TYPE_EXTERNREF)
+
+#endif

+ 1403 - 0
core/iwasm/fast-jit/jit_ir.c

@@ -0,0 +1,1403 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_ir.h"
+#include "jit_codegen.h"
+#include "jit_frontend.h"
+
+/**
+ * Operand kinds of instructions.
+ */
+enum { JIT_OPND_KIND_Reg, JIT_OPND_KIND_VReg, JIT_OPND_KIND_LookupSwitch };
+
+/**
+ * Operand kind of each instruction.
+ */
+static const uint8 insn_opnd_kind[] = {
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) JIT_OPND_KIND_##OPND_KIND,
+#include "jit_ir.def"
+#undef INSN
+};
+
+/**
+ * Operand number of each instruction.
+ */
+static const uint8 insn_opnd_num[] = {
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) OPND_NUM,
+#include "jit_ir.def"
+#undef INSN
+};
+
+/**
+ * Operand number of each instruction.
+ */
+static const uint8 insn_opnd_first_use[] = {
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) FIRST_USE,
+#include "jit_ir.def"
+#undef INSN
+};
+
+#define JIT_INSN_NEW_Reg(OPND_NUM) \
+    jit_calloc(offsetof(JitInsn, _opnd) + sizeof(JitReg) * (OPND_NUM))
+#define JIT_INSN_NEW_VReg(OPND_NUM)                     \
+    jit_calloc(offsetof(JitInsn, _opnd._opnd_VReg._reg) \
+               + sizeof(JitReg) * (OPND_NUM))
+
+JitInsn *
+_jit_insn_new_Reg_1(JitOpcode opc, JitReg r0)
+{
+    JitInsn *insn = JIT_INSN_NEW_Reg(1);
+
+    if (insn) {
+        insn->opcode = opc;
+        *jit_insn_opnd(insn, 0) = r0;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_Reg_2(JitOpcode opc, JitReg r0, JitReg r1)
+{
+    JitInsn *insn = JIT_INSN_NEW_Reg(2);
+
+    if (insn) {
+        insn->opcode = opc;
+        *jit_insn_opnd(insn, 0) = r0;
+        *jit_insn_opnd(insn, 1) = r1;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_Reg_3(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2)
+{
+    JitInsn *insn = JIT_INSN_NEW_Reg(3);
+
+    if (insn) {
+        insn->opcode = opc;
+        *jit_insn_opnd(insn, 0) = r0;
+        *jit_insn_opnd(insn, 1) = r1;
+        *jit_insn_opnd(insn, 2) = r2;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_Reg_4(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2, JitReg r3)
+{
+    JitInsn *insn = JIT_INSN_NEW_Reg(4);
+
+    if (insn) {
+        insn->opcode = opc;
+        *jit_insn_opnd(insn, 0) = r0;
+        *jit_insn_opnd(insn, 1) = r1;
+        *jit_insn_opnd(insn, 2) = r2;
+        *jit_insn_opnd(insn, 3) = r3;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_Reg_5(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2, JitReg r3,
+                    JitReg r4)
+{
+    JitInsn *insn = JIT_INSN_NEW_Reg(5);
+
+    if (insn) {
+        insn->opcode = opc;
+        *jit_insn_opnd(insn, 0) = r0;
+        *jit_insn_opnd(insn, 1) = r1;
+        *jit_insn_opnd(insn, 2) = r2;
+        *jit_insn_opnd(insn, 3) = r3;
+        *jit_insn_opnd(insn, 4) = r4;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_VReg_1(JitOpcode opc, JitReg r0, int n)
+{
+    JitInsn *insn = JIT_INSN_NEW_VReg(1 + n);
+
+    if (insn) {
+        insn->opcode = opc;
+        insn->_opnd._opnd_VReg._reg_num = 1 + n;
+        *(jit_insn_opndv(insn, 0)) = r0;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_VReg_2(JitOpcode opc, JitReg r0, JitReg r1, int n)
+{
+    JitInsn *insn = JIT_INSN_NEW_VReg(2 + n);
+
+    if (insn) {
+        insn->opcode = opc;
+        insn->_opnd._opnd_VReg._reg_num = 2 + n;
+        *(jit_insn_opndv(insn, 0)) = r0;
+        *(jit_insn_opndv(insn, 1)) = r1;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_insn_new_LookupSwitch_1(JitOpcode opc, JitReg value, uint32 num)
+{
+    JitOpndLookupSwitch *opnd = NULL;
+    JitInsn *insn =
+        jit_calloc(offsetof(JitInsn, _opnd._opnd_LookupSwitch.match_pairs)
+                   + sizeof(opnd->match_pairs[0]) * num);
+
+    if (insn) {
+        insn->opcode = opc;
+        opnd = jit_insn_opndls(insn);
+        opnd->value = value;
+        opnd->match_pairs_num = num;
+    }
+
+    return insn;
+}
+
+#undef JIT_INSN_NEW_Reg
+#undef JIT_INSN_NEW_VReg
+
+void
+jit_insn_insert_before(JitInsn *insn1, JitInsn *insn2)
+{
+    bh_assert(insn1->prev);
+    insn1->prev->next = insn2;
+    insn2->prev = insn1->prev;
+    insn2->next = insn1;
+    insn1->prev = insn2;
+}
+
+void
+jit_insn_insert_after(JitInsn *insn1, JitInsn *insn2)
+{
+    bh_assert(insn1->next);
+    insn1->next->prev = insn2;
+    insn2->next = insn1->next;
+    insn2->prev = insn1;
+    insn1->next = insn2;
+}
+
+void
+jit_insn_unlink(JitInsn *insn)
+{
+    bh_assert(insn->prev);
+    insn->prev->next = insn->next;
+    bh_assert(insn->next);
+    insn->next->prev = insn->prev;
+    insn->prev = insn->next = NULL;
+}
+
+unsigned
+jit_insn_hash(JitInsn *insn)
+{
+    const uint8 opcode = insn->opcode;
+    unsigned hash = opcode, i;
+
+    /* Currently, only instructions with Reg kind operand require
+       hashing.  For others, simply use opcode as the hash value.  */
+    if (insn_opnd_kind[opcode] != JIT_OPND_KIND_Reg
+        || insn_opnd_num[opcode] < 1)
+        return hash;
+
+    /* All the instructions with hashing support must be in the
+       assignment format, i.e. the first operand is the result (hence
+       being ignored) and all the others are operands.  This is also
+       true for CHK instructions, whose first operand is the instruction
+       pointer.  */
+    for (i = 1; i < insn_opnd_num[opcode]; i++)
+        hash = ((hash << 5) - hash) + *(jit_insn_opnd(insn, i));
+
+    return hash;
+}
+
+bool
+jit_insn_equal(JitInsn *insn1, JitInsn *insn2)
+{
+    const uint8 opcode = insn1->opcode;
+    unsigned i;
+
+    if (insn2->opcode != opcode)
+        return false;
+
+    if (insn_opnd_kind[opcode] != JIT_OPND_KIND_Reg
+        || insn_opnd_num[opcode] < 1)
+        return false;
+
+    for (i = 1; i < insn_opnd_num[opcode]; i++)
+        if (*(jit_insn_opnd(insn1, i)) != *(jit_insn_opnd(insn2, i)))
+            return false;
+
+    return true;
+}
+
+JitRegVec
+jit_insn_opnd_regs(JitInsn *insn)
+{
+    JitRegVec vec = { 0 };
+    JitOpndLookupSwitch *ls;
+
+    vec._stride = 1;
+
+    switch (insn_opnd_kind[insn->opcode]) {
+        case JIT_OPND_KIND_Reg:
+            vec.num = insn_opnd_num[insn->opcode];
+            vec._base = jit_insn_opnd(insn, 0);
+            break;
+
+        case JIT_OPND_KIND_VReg:
+            vec.num = jit_insn_opndv_num(insn);
+            vec._base = jit_insn_opndv(insn, 0);
+            break;
+
+        case JIT_OPND_KIND_LookupSwitch:
+            ls = jit_insn_opndls(insn);
+            vec.num = ls->match_pairs_num + 2;
+            vec._base = &ls->value;
+            vec._stride = sizeof(ls->match_pairs[0]) / sizeof(*vec._base);
+            break;
+    }
+
+    return vec;
+}
+
+unsigned
+jit_insn_opnd_first_use(JitInsn *insn)
+{
+    return insn_opnd_first_use[insn->opcode];
+}
+
+JitBasicBlock *
+jit_basic_block_new(JitReg label, int n)
+{
+    JitBasicBlock *block = jit_insn_new_PHI(label, n);
+    if (!block)
+        return NULL;
+
+    block->prev = block->next = block;
+    return block;
+}
+
+void
+jit_basic_block_delete(JitBasicBlock *block)
+{
+    JitInsn *insn, *next_insn, *end;
+
+    if (!block)
+        return;
+
+    insn = jit_basic_block_first_insn(block);
+    end = jit_basic_block_end_insn(block);
+
+    for (; insn != end; insn = next_insn) {
+        next_insn = insn->next;
+        jit_insn_delete(insn);
+    }
+
+    jit_insn_delete(block);
+}
+
+JitRegVec
+jit_basic_block_preds(JitBasicBlock *block)
+{
+    JitRegVec vec;
+
+    vec.num = jit_insn_opndv_num(block) - 1;
+    vec._base = vec.num > 0 ? jit_insn_opndv(block, 1) : NULL;
+    vec._stride = 1;
+
+    return vec;
+}
+
+JitRegVec
+jit_basic_block_succs(JitBasicBlock *block)
+{
+    JitInsn *last_insn = jit_basic_block_last_insn(block);
+    JitRegVec vec;
+
+    vec.num = 0;
+    vec._base = NULL;
+    vec._stride = 1;
+
+    switch (last_insn->opcode) {
+        case JIT_OP_JMP:
+            vec.num = 1;
+            vec._base = jit_insn_opnd(last_insn, 0);
+            break;
+
+        case JIT_OP_BEQ:
+        case JIT_OP_BNE:
+        case JIT_OP_BGTS:
+        case JIT_OP_BGES:
+        case JIT_OP_BLTS:
+        case JIT_OP_BLES:
+        case JIT_OP_BGTU:
+        case JIT_OP_BGEU:
+        case JIT_OP_BLTU:
+        case JIT_OP_BLEU:
+            vec.num = 2;
+            vec._base = jit_insn_opnd(last_insn, 1);
+            break;
+
+        case JIT_OP_LOOKUPSWITCH:
+        {
+            JitOpndLookupSwitch *opnd = jit_insn_opndls(last_insn);
+            vec.num = opnd->match_pairs_num + 1;
+            vec._base = &opnd->default_target;
+            vec._stride = sizeof(opnd->match_pairs[0]) / sizeof(*vec._base);
+            break;
+        }
+
+        default:
+            vec._stride = 0;
+    }
+
+    return vec;
+}
+
+JitCompContext *
+jit_cc_init(JitCompContext *cc, unsigned htab_size)
+{
+    JitBasicBlock *entry_block, *exit_block;
+    unsigned i, num;
+
+    memset(cc, 0, sizeof(*cc));
+    cc->_reference_count = 1;
+    jit_annl_enable_basic_block(cc);
+
+    /* Create entry and exit blocks.  They must be the first two
+       blocks respectively.  */
+    if (!(entry_block = jit_cc_new_basic_block(cc, 0))
+        || !(exit_block = jit_cc_new_basic_block(cc, 0)))
+        goto fail;
+
+    if (!(cc->exce_basic_blocks =
+              jit_calloc(sizeof(JitBasicBlock *) * JIT_EXCE_NUM)))
+        goto fail;
+
+    if (!(cc->incoming_insns_for_exec_bbs =
+              jit_calloc(sizeof(JitIncomingInsnList) * JIT_EXCE_NUM)))
+        goto fail;
+
+    /* Record the entry and exit labels, whose indexes must be 0 and 1
+       respectively.  */
+    cc->entry_label = jit_basic_block_label(entry_block);
+    cc->exit_label = jit_basic_block_label(exit_block);
+    bh_assert(jit_reg_no(cc->entry_label) == 0
+              && jit_reg_no(cc->exit_label) == 1);
+
+    cc->hreg_info = jit_codegen_get_hreg_info();
+    bh_assert(cc->hreg_info->info[JIT_REG_KIND_I32].num > 3);
+
+    /* Initialize virtual registers for hard registers.  */
+    for (i = JIT_REG_KIND_VOID; i < JIT_REG_KIND_L32; i++) {
+        if ((num = cc->hreg_info->info[i].num)) {
+            /* Initialize the capacity to be large enough.  */
+            jit_cc_new_reg(cc, i);
+            bh_assert(cc->_ann._reg_capacity[i] > num);
+            cc->_ann._reg_num[i] = num;
+        }
+    }
+
+    /* Create registers for frame pointer, exec_env and cmp.  */
+    cc->fp_reg = jit_reg_new(JIT_REG_KIND_PTR, cc->hreg_info->fp_hreg_index);
+    cc->exec_env_reg =
+        jit_reg_new(JIT_REG_KIND_PTR, cc->hreg_info->exec_env_hreg_index);
+    cc->cmp_reg = jit_reg_new(JIT_REG_KIND_I32, cc->hreg_info->cmp_hreg_index);
+
+    cc->_const_val._hash_table_size = htab_size;
+
+    if (!(cc->_const_val._hash_table =
+              jit_calloc(htab_size * sizeof(*cc->_const_val._hash_table))))
+        goto fail;
+
+    return cc;
+
+fail:
+    jit_cc_destroy(cc);
+    return NULL;
+}
+
+void
+jit_cc_destroy(JitCompContext *cc)
+{
+    unsigned i, end;
+    JitBasicBlock *block;
+    JitIncomingInsn *incoming_insn, *incoming_insn_next;
+
+    jit_block_stack_destroy(&cc->block_stack);
+
+    if (cc->jit_frame) {
+        if (cc->jit_frame->memory_regs)
+            jit_free(cc->jit_frame->memory_regs);
+        if (cc->jit_frame->table_regs)
+            jit_free(cc->jit_frame->table_regs);
+        jit_free(cc->jit_frame);
+    }
+
+    if (cc->memory_regs)
+        jit_free(cc->memory_regs);
+
+    if (cc->table_regs)
+        jit_free(cc->table_regs);
+
+    jit_free(cc->_const_val._hash_table);
+
+    /* Release the instruction hash table.  */
+    jit_cc_disable_insn_hash(cc);
+
+    jit_free(cc->exce_basic_blocks);
+
+    if (cc->incoming_insns_for_exec_bbs) {
+        for (i = 0; i < JIT_EXCE_NUM; i++) {
+            incoming_insn = cc->incoming_insns_for_exec_bbs[i];
+            while (incoming_insn) {
+                incoming_insn_next = incoming_insn->next;
+                jit_free(incoming_insn);
+                incoming_insn = incoming_insn_next;
+            }
+        }
+        jit_free(cc->incoming_insns_for_exec_bbs);
+    }
+
+    /* Release entry and exit blocks.  */
+    jit_basic_block_delete(jit_cc_entry_basic_block(cc));
+    jit_basic_block_delete(jit_cc_exit_basic_block(cc));
+
+    /* clang-format off */
+    /* Release blocks and instructions.  */
+    JIT_FOREACH_BLOCK(cc, i, end, block)
+    {
+        jit_basic_block_delete(block);
+    }
+    /* clang-format on */
+
+    /* Release constant values.  */
+    for (i = JIT_REG_KIND_VOID; i < JIT_REG_KIND_L32; i++) {
+        jit_free(cc->_const_val._value[i]);
+        jit_free(cc->_const_val._next[i]);
+    }
+
+    /* Release storage of annotations.  */
+#define ANN_LABEL(TYPE, NAME) jit_annl_disable_##NAME(cc);
+#define ANN_INSN(TYPE, NAME) jit_anni_disable_##NAME(cc);
+#define ANN_REG(TYPE, NAME) jit_annr_disable_##NAME(cc);
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+}
+
+void
+jit_cc_delete(JitCompContext *cc)
+{
+    if (cc && --cc->_reference_count == 0) {
+        jit_cc_destroy(cc);
+        jit_free(cc);
+    }
+}
+
+/*
+ * Reallocate a memory block with the new_size.
+ * TODO: replace this with imported jit_realloc when it's available.
+ */
+static void *
+_jit_realloc(void *ptr, unsigned new_size, unsigned old_size)
+{
+    void *new_ptr = jit_malloc(new_size);
+
+    if (new_ptr) {
+        bh_assert(new_size > old_size);
+
+        if (ptr) {
+            memcpy(new_ptr, ptr, old_size);
+            memset((uint8 *)new_ptr + old_size, 0, new_size - old_size);
+            jit_free(ptr);
+        }
+        else
+            memset(new_ptr, 0, new_size);
+    }
+
+    return new_ptr;
+}
+
+static unsigned
+hash_of_const(unsigned kind, unsigned size, void *val)
+{
+    uint8 *p = (uint8 *)val, *end = p + size;
+    unsigned hash = kind;
+
+    do
+        hash = ((hash << 5) - hash) + *p++;
+    while (p != end);
+
+    return hash;
+}
+
+static inline void *
+address_of_const(JitCompContext *cc, JitReg reg, unsigned size)
+{
+    int kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    unsigned idx = no & ~_JIT_REG_CONST_IDX_FLAG;
+
+    bh_assert(jit_reg_is_const_idx(reg) && idx < cc->_const_val._num[kind]);
+
+    return cc->_const_val._value[kind] + size * idx;
+}
+
+static inline JitReg
+next_of_const(JitCompContext *cc, JitReg reg)
+{
+    int kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    unsigned idx = no & ~_JIT_REG_CONST_IDX_FLAG;
+
+    bh_assert(jit_reg_is_const_idx(reg) && idx < cc->_const_val._num[kind]);
+
+    return cc->_const_val._next[kind][idx];
+}
+
+/**
+ * Put a constant value into the compilation context.
+ *
+ * @param cc compilation context
+ * @param kind register kind
+ * @param size size of the value
+ * @param val pointer to value which must be aligned
+ *
+ * @return a constant register containing the value
+ */
+static JitReg
+_jit_cc_new_const(JitCompContext *cc, int kind, unsigned size, void *val)
+{
+    unsigned num = cc->_const_val._num[kind], slot;
+    unsigned capacity = cc->_const_val._capacity[kind];
+    uint8 *new_value;
+    JitReg r, *new_next;
+
+    bh_assert(num <= capacity);
+
+    /* Find the existing value first.  */
+    slot = hash_of_const(kind, size, val) % cc->_const_val._hash_table_size;
+    r = cc->_const_val._hash_table[slot];
+
+    for (; r; r = next_of_const(cc, r))
+        if (jit_reg_kind(r) == kind
+            && !memcmp(val, address_of_const(cc, r, size), size))
+            return r;
+
+    if (num == capacity) {
+        /* Increase the space of value and next.  */
+        capacity = capacity > 0 ? (capacity + capacity / 2) : 16;
+        new_value = _jit_realloc(cc->_const_val._value[kind], size * capacity,
+                                 size * num);
+        new_next =
+            _jit_realloc(cc->_const_val._next[kind],
+                         sizeof(*new_next) * capacity, sizeof(*new_next) * num);
+
+        if (new_value && new_next) {
+            cc->_const_val._value[kind] = new_value;
+            cc->_const_val._next[kind] = new_next;
+        }
+        else {
+            jit_set_last_error(cc, "create const register failed");
+            jit_free(new_value);
+            jit_free(new_next);
+            return 0;
+        }
+
+        cc->_const_val._capacity[kind] = capacity;
+    }
+
+    bh_assert(num + 1 < (uint32)_JIT_REG_CONST_IDX_FLAG);
+    r = jit_reg_new(kind, _JIT_REG_CONST_IDX_FLAG | num);
+    memcpy(cc->_const_val._value[kind] + size * num, val, size);
+    cc->_const_val._next[kind][num] = cc->_const_val._hash_table[slot];
+    cc->_const_val._hash_table[slot] = r;
+    cc->_const_val._num[kind] = num + 1;
+
+    return r;
+}
+
+static inline int32
+get_const_val_in_reg(JitReg reg)
+{
+    int shift = 8 * sizeof(reg) - _JIT_REG_KIND_SHIFT + 1;
+    return ((int32)(reg << shift)) >> shift;
+}
+
+#define _JIT_CC_NEW_CONST_HELPER(KIND, TYPE, val)                             \
+    do {                                                                      \
+        JitReg reg = jit_reg_new(                                             \
+            JIT_REG_KIND_##KIND,                                              \
+            (_JIT_REG_CONST_VAL_FLAG | ((JitReg)val & ~_JIT_REG_KIND_MASK))); \
+                                                                              \
+        if ((TYPE)get_const_val_in_reg(reg) == val)                           \
+            return reg;                                                       \
+        return _jit_cc_new_const(cc, JIT_REG_KIND_##KIND, sizeof(val), &val); \
+    } while (0)
+
+JitReg
+jit_cc_new_const_I32_rel(JitCompContext *cc, int32 val, uint32 rel)
+{
+    uint64 val64 = (uint64)(uint32)val | ((uint64)rel << 32);
+    _JIT_CC_NEW_CONST_HELPER(I32, uint64, val64);
+}
+
+JitReg
+jit_cc_new_const_I64(JitCompContext *cc, int64 val)
+{
+    _JIT_CC_NEW_CONST_HELPER(I64, int64, val);
+}
+
+JitReg
+jit_cc_new_const_F32(JitCompContext *cc, float val)
+{
+    int32 float_neg_zero = 0x80000000;
+
+    if (!memcmp(&val, &float_neg_zero, sizeof(float)))
+        /* Create const -0.0f */
+        return _jit_cc_new_const(cc, JIT_REG_KIND_F32, sizeof(float), &val);
+
+    _JIT_CC_NEW_CONST_HELPER(F32, float, val);
+}
+
+JitReg
+jit_cc_new_const_F64(JitCompContext *cc, double val)
+{
+    int64 double_neg_zero = 0x8000000000000000ll;
+
+    if (!memcmp(&val, &double_neg_zero, sizeof(double)))
+        /* Create const -0.0d */
+        return _jit_cc_new_const(cc, JIT_REG_KIND_F64, sizeof(double), &val);
+
+    _JIT_CC_NEW_CONST_HELPER(F64, double, val);
+}
+
+#undef _JIT_CC_NEW_CONST_HELPER
+
+#define _JIT_CC_GET_CONST_HELPER(KIND, TYPE)                               \
+    do {                                                                   \
+        bh_assert(jit_reg_kind(reg) == JIT_REG_KIND_##KIND);               \
+        bh_assert(jit_reg_is_const(reg));                                  \
+                                                                           \
+        return (jit_reg_is_const_val(reg)                                  \
+                    ? (TYPE)get_const_val_in_reg(reg)                      \
+                    : *(TYPE *)(address_of_const(cc, reg, sizeof(TYPE)))); \
+    } while (0)
+
+static uint64
+jit_cc_get_const_I32_helper(JitCompContext *cc, JitReg reg)
+{
+    _JIT_CC_GET_CONST_HELPER(I32, uint64);
+}
+
+uint32
+jit_cc_get_const_I32_rel(JitCompContext *cc, JitReg reg)
+{
+    return (uint32)(jit_cc_get_const_I32_helper(cc, reg) >> 32);
+}
+
+int32
+jit_cc_get_const_I32(JitCompContext *cc, JitReg reg)
+{
+    return (int32)(jit_cc_get_const_I32_helper(cc, reg));
+}
+
+int64
+jit_cc_get_const_I64(JitCompContext *cc, JitReg reg)
+{
+    _JIT_CC_GET_CONST_HELPER(I64, int64);
+}
+
+float
+jit_cc_get_const_F32(JitCompContext *cc, JitReg reg)
+{
+    _JIT_CC_GET_CONST_HELPER(F32, float);
+}
+
+double
+jit_cc_get_const_F64(JitCompContext *cc, JitReg reg)
+{
+    _JIT_CC_GET_CONST_HELPER(F64, double);
+}
+
+#undef _JIT_CC_GET_CONST_HELPER
+
+#define _JIT_REALLOC_ANN(TYPE, NAME, ANN, POSTFIX)                             \
+    if (successful && cc->_ann._##ANN##_##NAME##_enabled) {                    \
+        TYPE *ptr = _jit_realloc(cc->_ann._##ANN##_##NAME POSTFIX,             \
+                                 sizeof(TYPE) * capacity, sizeof(TYPE) * num); \
+        if (ptr)                                                               \
+            cc->_ann._##ANN##_##NAME POSTFIX = ptr;                            \
+        else                                                                   \
+            successful = false;                                                \
+    }
+
+JitReg
+jit_cc_new_label(JitCompContext *cc)
+{
+    unsigned num = cc->_ann._label_num;
+    unsigned capacity = cc->_ann._label_capacity;
+    bool successful = true;
+
+    bh_assert(num <= capacity);
+
+    if (num == capacity) {
+        capacity = capacity > 0 ? (capacity + capacity / 2) : 16;
+
+#define EMPTY_POSTFIX
+#define ANN_LABEL(TYPE, NAME) _JIT_REALLOC_ANN(TYPE, NAME, label, EMPTY_POSTFIX)
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef EMPTY_POSTFIX
+
+        if (!successful) {
+            jit_set_last_error(cc, "create label register failed");
+            return 0;
+        }
+
+        cc->_ann._label_capacity = capacity;
+    }
+
+    cc->_ann._label_num = num + 1;
+
+    return jit_reg_new(JIT_REG_KIND_L32, num);
+}
+
+JitBasicBlock *
+jit_cc_new_basic_block(JitCompContext *cc, int n)
+{
+    JitReg label = jit_cc_new_label(cc);
+    JitBasicBlock *block = NULL;
+
+    if (label && (block = jit_basic_block_new(label, n)))
+        /* Void 0 register indicates error in creation.  */
+        *(jit_annl_basic_block(cc, label)) = block;
+    else
+        jit_set_last_error(cc, "create basic block failed");
+
+    return block;
+}
+
+JitBasicBlock *
+jit_cc_resize_basic_block(JitCompContext *cc, JitBasicBlock *block, int n)
+{
+    JitReg label = jit_basic_block_label(block);
+    JitInsn *insn = jit_basic_block_first_insn(block);
+    JitBasicBlock *new_block = jit_basic_block_new(label, n);
+
+    if (!new_block) {
+        jit_set_last_error(cc, "resize basic block failed");
+        return NULL;
+    }
+
+    jit_insn_unlink(block);
+
+    if (insn != block)
+        jit_insn_insert_before(insn, new_block);
+
+    bh_assert(*(jit_annl_basic_block(cc, label)) == block);
+    *(jit_annl_basic_block(cc, label)) = new_block;
+    jit_insn_delete(block);
+
+    return new_block;
+}
+
+bool
+jit_cc_enable_insn_hash(JitCompContext *cc, unsigned n)
+{
+    if (jit_anni_is_enabled__hash_link(cc))
+        return true;
+
+    if (!jit_anni_enable__hash_link(cc))
+        return false;
+
+    /* The table must not exist.  */
+    bh_assert(!cc->_insn_hash_table._table);
+
+    /* Integer overflow cannot happen because n << 4G (at most several
+       times of 64K in the most extreme case).  */
+    if (!(cc->_insn_hash_table._table =
+              jit_calloc(n * sizeof(*cc->_insn_hash_table._table)))) {
+        jit_anni_disable__hash_link(cc);
+        return false;
+    }
+
+    cc->_insn_hash_table._size = n;
+    return true;
+}
+
+void
+jit_cc_disable_insn_hash(JitCompContext *cc)
+{
+    jit_anni_disable__hash_link(cc);
+    jit_free(cc->_insn_hash_table._table);
+    cc->_insn_hash_table._table = NULL;
+    cc->_insn_hash_table._size = 0;
+}
+
+void
+jit_cc_reset_insn_hash(JitCompContext *cc)
+{
+    if (jit_anni_is_enabled__hash_link(cc))
+        memset(cc->_insn_hash_table._table, 0,
+               cc->_insn_hash_table._size
+                   * sizeof(*cc->_insn_hash_table._table));
+}
+
+JitInsn *
+jit_cc_set_insn_uid(JitCompContext *cc, JitInsn *insn)
+{
+    if (insn) {
+        unsigned num = cc->_ann._insn_num;
+        unsigned capacity = cc->_ann._insn_capacity;
+        bool successful = true;
+
+        bh_assert(num <= capacity);
+
+        if (num == capacity) {
+            capacity = capacity > 0 ? (capacity + capacity / 2) : 64;
+
+#define EMPTY_POSTFIX
+#define ANN_INSN(TYPE, NAME) _JIT_REALLOC_ANN(TYPE, NAME, insn, EMPTY_POSTFIX)
+#include "jit_ir.def"
+#undef ANN_INSN
+#undef EMPTY_POSTFIX
+
+            if (!successful) {
+                jit_set_last_error(cc, "set insn uid failed");
+                return NULL;
+            }
+
+            cc->_ann._insn_capacity = capacity;
+        }
+
+        cc->_ann._insn_num = num + 1;
+        insn->uid = num;
+    }
+
+    return insn;
+}
+
+JitInsn *
+_jit_cc_set_insn_uid_for_new_insn(JitCompContext *cc, JitInsn *insn)
+{
+    if (jit_cc_set_insn_uid(cc, insn))
+        return insn;
+
+    jit_insn_delete(insn);
+    return NULL;
+}
+
+JitReg
+jit_cc_new_reg(JitCompContext *cc, unsigned kind)
+{
+    unsigned num = jit_cc_reg_num(cc, kind);
+    unsigned capacity = cc->_ann._reg_capacity[kind];
+    bool successful = true;
+
+    bh_assert(num <= capacity);
+
+    if (num == capacity) {
+        capacity = (capacity == 0
+                        /* Initialize the capacity to be larger than hard
+                           register number.  */
+                        ? cc->hreg_info->info[kind].num + 16
+                        : capacity + capacity / 2);
+
+#define ANN_REG(TYPE, NAME) _JIT_REALLOC_ANN(TYPE, NAME, reg, [kind])
+#include "jit_ir.def"
+#undef ANN_REG
+
+        if (!successful) {
+            jit_set_last_error(cc, "create register failed");
+            return 0;
+        }
+
+        cc->_ann._reg_capacity[kind] = capacity;
+    }
+
+    cc->_ann._reg_num[kind] = num + 1;
+
+    return jit_reg_new(kind, num);
+}
+
+#undef _JIT_REALLOC_ANN
+
+#define ANN_LABEL(TYPE, NAME)                                                \
+    bool jit_annl_enable_##NAME(JitCompContext *cc)                          \
+    {                                                                        \
+        if (cc->_ann._label_##NAME##_enabled)                                \
+            return true;                                                     \
+                                                                             \
+        if (cc->_ann._label_capacity > 0                                     \
+            && !(cc->_ann._label_##NAME =                                    \
+                     jit_calloc(cc->_ann._label_capacity * sizeof(TYPE)))) { \
+            jit_set_last_error(cc, "annl enable " #NAME "failed");           \
+            return false;                                                    \
+        }                                                                    \
+                                                                             \
+        cc->_ann._label_##NAME##_enabled = 1;                                \
+        return true;                                                         \
+    }
+#define ANN_INSN(TYPE, NAME)                                                \
+    bool jit_anni_enable_##NAME(JitCompContext *cc)                         \
+    {                                                                       \
+        if (cc->_ann._insn_##NAME##_enabled)                                \
+            return true;                                                    \
+                                                                            \
+        if (cc->_ann._insn_capacity > 0                                     \
+            && !(cc->_ann._insn_##NAME =                                    \
+                     jit_calloc(cc->_ann._insn_capacity * sizeof(TYPE)))) { \
+            jit_set_last_error(cc, "anni enable " #NAME "failed");          \
+            return false;                                                   \
+        }                                                                   \
+                                                                            \
+        cc->_ann._insn_##NAME##_enabled = 1;                                \
+        return true;                                                        \
+    }
+#define ANN_REG(TYPE, NAME)                                            \
+    bool jit_annr_enable_##NAME(JitCompContext *cc)                    \
+    {                                                                  \
+        unsigned k;                                                    \
+                                                                       \
+        if (cc->_ann._reg_##NAME##_enabled)                            \
+            return true;                                               \
+                                                                       \
+        for (k = JIT_REG_KIND_VOID; k < JIT_REG_KIND_L32; k++)         \
+            if (cc->_ann._reg_capacity[k] > 0                          \
+                && !(cc->_ann._reg_##NAME[k] = jit_calloc(             \
+                         cc->_ann._reg_capacity[k] * sizeof(TYPE)))) { \
+                jit_set_last_error(cc, "annr enable " #NAME "failed"); \
+                jit_annr_disable_##NAME(cc);                           \
+                return false;                                          \
+            }                                                          \
+                                                                       \
+        cc->_ann._reg_##NAME##_enabled = 1;                            \
+        return true;                                                   \
+    }
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+#define ANN_LABEL(TYPE, NAME)                        \
+    void jit_annl_disable_##NAME(JitCompContext *cc) \
+    {                                                \
+        jit_free(cc->_ann._label_##NAME);            \
+        cc->_ann._label_##NAME = NULL;               \
+        cc->_ann._label_##NAME##_enabled = 0;        \
+    }
+#define ANN_INSN(TYPE, NAME)                         \
+    void jit_anni_disable_##NAME(JitCompContext *cc) \
+    {                                                \
+        jit_free(cc->_ann._insn_##NAME);             \
+        cc->_ann._insn_##NAME = NULL;                \
+        cc->_ann._insn_##NAME##_enabled = 0;         \
+    }
+#define ANN_REG(TYPE, NAME)                                      \
+    void jit_annr_disable_##NAME(JitCompContext *cc)             \
+    {                                                            \
+        unsigned k;                                              \
+                                                                 \
+        for (k = JIT_REG_KIND_VOID; k < JIT_REG_KIND_L32; k++) { \
+            jit_free(cc->_ann._reg_##NAME[k]);                   \
+            cc->_ann._reg_##NAME[k] = NULL;                      \
+        }                                                        \
+                                                                 \
+        cc->_ann._reg_##NAME##_enabled = 0;                      \
+    }
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+char *
+jit_get_last_error(JitCompContext *cc)
+{
+    return cc->last_error[0] == '\0' ? NULL : cc->last_error;
+}
+
+void
+jit_set_last_error_v(JitCompContext *cc, const char *format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    vsnprintf(cc->last_error, sizeof(cc->last_error), format, args);
+    va_end(args);
+}
+
+void
+jit_set_last_error(JitCompContext *cc, const char *error)
+{
+    if (error)
+        snprintf(cc->last_error, sizeof(cc->last_error), "Error: %s", error);
+    else
+        cc->last_error[0] = '\0';
+}
+
+bool
+jit_cc_update_cfg(JitCompContext *cc)
+{
+    JitBasicBlock *block;
+    unsigned block_index, end, succ_index, idx;
+    JitReg *target;
+    bool retval = false;
+
+    if (!jit_annl_enable_pred_num(cc))
+        return false;
+
+    /* Update pred_num of all blocks.  */
+    JIT_FOREACH_BLOCK_ENTRY_EXIT(cc, block_index, end, block)
+    {
+        JitRegVec succs = jit_basic_block_succs(block);
+
+        JIT_REG_VEC_FOREACH(succs, succ_index, target)
+        if (jit_reg_is_kind(L32, *target))
+            *(jit_annl_pred_num(cc, *target)) += 1;
+    }
+
+    /* Resize predecessor vectors of body blocks.  */
+    JIT_FOREACH_BLOCK(cc, block_index, end, block)
+    {
+        if (!jit_cc_resize_basic_block(
+                cc, block,
+                *(jit_annl_pred_num(cc, jit_basic_block_label(block)))))
+            goto cleanup_and_return;
+    }
+
+    /* Fill in predecessor vectors all blocks.  */
+    JIT_FOREACH_BLOCK_REVERSE_ENTRY_EXIT(cc, block_index, block)
+    {
+        JitRegVec succs = jit_basic_block_succs(block), preds;
+
+        JIT_REG_VEC_FOREACH(succs, succ_index, target)
+        if (jit_reg_is_kind(L32, *target)) {
+            preds = jit_basic_block_preds(*(jit_annl_basic_block(cc, *target)));
+            bh_assert(*(jit_annl_pred_num(cc, *target)) > 0);
+            idx = *(jit_annl_pred_num(cc, *target)) - 1;
+            *(jit_annl_pred_num(cc, *target)) = idx;
+            *(jit_reg_vec_at(&preds, idx)) = jit_basic_block_label(block);
+        }
+    }
+
+    retval = true;
+
+cleanup_and_return:
+    jit_annl_disable_pred_num(cc);
+    return retval;
+}
+
+void
+jit_value_stack_push(JitValueStack *stack, JitValue *value)
+{
+    if (!stack->value_list_head)
+        stack->value_list_head = stack->value_list_end = value;
+    else {
+        stack->value_list_end->next = value;
+        value->prev = stack->value_list_end;
+        stack->value_list_end = value;
+    }
+}
+
+JitValue *
+jit_value_stack_pop(JitValueStack *stack)
+{
+    JitValue *value = stack->value_list_end;
+
+    bh_assert(stack->value_list_end);
+
+    if (stack->value_list_head == stack->value_list_end)
+        stack->value_list_head = stack->value_list_end = NULL;
+    else {
+        stack->value_list_end = stack->value_list_end->prev;
+        stack->value_list_end->next = NULL;
+        value->prev = NULL;
+    }
+
+    return value;
+}
+
+void
+jit_value_stack_destroy(JitValueStack *stack)
+{
+    JitValue *value = stack->value_list_head, *p;
+
+    while (value) {
+        p = value->next;
+        jit_free(value);
+        value = p;
+    }
+
+    stack->value_list_head = NULL;
+    stack->value_list_end = NULL;
+}
+
+void
+jit_block_stack_push(JitBlockStack *stack, JitBlock *block)
+{
+    if (!stack->block_list_head)
+        stack->block_list_head = stack->block_list_end = block;
+    else {
+        stack->block_list_end->next = block;
+        block->prev = stack->block_list_end;
+        stack->block_list_end = block;
+    }
+}
+
+JitBlock *
+jit_block_stack_top(JitBlockStack *stack)
+{
+    return stack->block_list_end;
+}
+
+JitBlock *
+jit_block_stack_pop(JitBlockStack *stack)
+{
+    JitBlock *block = stack->block_list_end;
+
+    bh_assert(stack->block_list_end);
+
+    if (stack->block_list_head == stack->block_list_end)
+        stack->block_list_head = stack->block_list_end = NULL;
+    else {
+        stack->block_list_end = stack->block_list_end->prev;
+        stack->block_list_end->next = NULL;
+        block->prev = NULL;
+    }
+
+    return block;
+}
+
+void
+jit_block_stack_destroy(JitBlockStack *stack)
+{
+    JitBlock *block = stack->block_list_head, *p;
+
+    while (block) {
+        p = block->next;
+        jit_value_stack_destroy(&block->value_stack);
+        jit_block_destroy(block);
+        block = p;
+    }
+
+    stack->block_list_head = NULL;
+    stack->block_list_end = NULL;
+}
+
+bool
+jit_block_add_incoming_insn(JitBlock *block, JitInsn *insn, uint32 opnd_idx)
+{
+    JitIncomingInsn *incoming_insn;
+
+    if (!(incoming_insn = jit_calloc((uint32)sizeof(JitIncomingInsn))))
+        return false;
+
+    incoming_insn->insn = insn;
+    incoming_insn->opnd_idx = opnd_idx;
+    incoming_insn->next = block->incoming_insns_for_end_bb;
+    block->incoming_insns_for_end_bb = incoming_insn;
+    return true;
+}
+
+void
+jit_block_destroy(JitBlock *block)
+{
+    JitIncomingInsn *incoming_insn, *incoming_insn_next;
+
+    jit_value_stack_destroy(&block->value_stack);
+    if (block->param_types)
+        jit_free(block->param_types);
+    if (block->result_types)
+        jit_free(block->result_types);
+
+    incoming_insn = block->incoming_insns_for_end_bb;
+    while (incoming_insn) {
+        incoming_insn_next = incoming_insn->next;
+        jit_free(incoming_insn);
+        incoming_insn = incoming_insn_next;
+    }
+
+    jit_free(block);
+}
+
+static inline uint8
+to_stack_value_type(uint8 type)
+{
+#if WASM_ENABLE_REF_TYPES != 0
+    if (type == VALUE_TYPE_EXTERNREF || type == VALUE_TYPE_FUNCREF)
+        return VALUE_TYPE_I32;
+#endif
+    return type;
+}
+
+bool
+jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value)
+{
+    JitValue *jit_value = NULL;
+    JitReg value = 0;
+
+    if (!jit_block_stack_top(&cc->block_stack)) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+    if (!jit_block_stack_top(&cc->block_stack)->value_stack.value_list_end) {
+        jit_set_last_error(cc, "WASM data stack underflow");
+        return false;
+    }
+
+    jit_value = jit_value_stack_pop(
+        &jit_block_stack_top(&cc->block_stack)->value_stack);
+    bh_assert(jit_value);
+
+    if (jit_value->type != to_stack_value_type(type)) {
+        jit_set_last_error(cc, "invalid WASM stack data type");
+        jit_free(jit_value);
+        return false;
+    }
+
+    switch (jit_value->type) {
+        case VALUE_TYPE_I32:
+            value = pop_i32(cc->jit_frame);
+            break;
+        case VALUE_TYPE_I64:
+            value = pop_i64(cc->jit_frame);
+            break;
+        case VALUE_TYPE_F32:
+            value = pop_f32(cc->jit_frame);
+            break;
+        case VALUE_TYPE_F64:
+            value = pop_f64(cc->jit_frame);
+            break;
+        default:
+            bh_assert(0);
+            break;
+    }
+
+    bh_assert(cc->jit_frame->sp == jit_value->value);
+    bh_assert(value == jit_value->value->reg);
+    *p_value = value;
+    jit_free(jit_value);
+    return true;
+}
+
+bool
+jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value)
+{
+    JitValue *jit_value;
+
+    if (!jit_block_stack_top(&cc->block_stack)) {
+        jit_set_last_error(cc, "WASM block stack underflow");
+        return false;
+    }
+
+    if (!(jit_value = jit_calloc(sizeof(JitValue)))) {
+        jit_set_last_error(cc, "allocate memory failed");
+        return false;
+    }
+
+    bh_assert(value);
+
+    jit_value->type = to_stack_value_type(type);
+    jit_value->value = cc->jit_frame->sp;
+    jit_value_stack_push(&jit_block_stack_top(&cc->block_stack)->value_stack,
+                         jit_value);
+
+    switch (jit_value->type) {
+        case VALUE_TYPE_I32:
+            push_i32(cc->jit_frame, value);
+            break;
+        case VALUE_TYPE_I64:
+            push_i64(cc->jit_frame, value);
+            break;
+        case VALUE_TYPE_F32:
+            push_f32(cc->jit_frame, value);
+            break;
+        case VALUE_TYPE_F64:
+            push_f64(cc->jit_frame, value);
+            break;
+    }
+
+    return true;
+}
+
+bool
+_jit_insn_check_opnd_access_Reg(const JitInsn *insn, unsigned n)
+{
+    unsigned opcode = insn->opcode;
+    return (insn_opnd_kind[opcode] == JIT_OPND_KIND_Reg
+            && n < insn_opnd_num[opcode]);
+}
+
+bool
+_jit_insn_check_opnd_access_VReg(const JitInsn *insn, unsigned n)
+{
+    unsigned opcode = insn->opcode;
+    return (insn_opnd_kind[opcode] == JIT_OPND_KIND_VReg
+            && n < insn->_opnd._opnd_VReg._reg_num);
+}
+
+bool
+_jit_insn_check_opnd_access_LookupSwitch(const JitInsn *insn)
+{
+    unsigned opcode = insn->opcode;
+    return (insn_opnd_kind[opcode] == JIT_OPND_KIND_LookupSwitch);
+}
+
+bool
+jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock)
+{
+    bool ret = false;
+    JitInsn *prevent_spill = NULL;
+    JitInsn *indicate_using = NULL;
+
+    if (!the_insn)
+        goto just_return;
+
+    if (jit_cc_is_hreg_fixed(cc, reg_to_lock)) {
+        ret = true;
+        goto just_return;
+    }
+
+    /**
+     * give the virtual register of the locked hard register a minimum, non-zero
+     * distance, * so as to prevent it from being spilled out
+     */
+    prevent_spill = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
+    if (!prevent_spill)
+        goto just_return;
+
+    jit_insn_insert_before(the_insn, prevent_spill);
+
+    /**
+     * announce the locked hard register is being used, and do necessary spill
+     * ASAP
+     */
+    indicate_using = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
+    if (!indicate_using)
+        goto just_return;
+
+    jit_insn_insert_after(the_insn, indicate_using);
+
+    ret = true;
+
+just_return:
+    if (!ret)
+        jit_set_last_error(cc, "generate insn failed");
+    return ret;
+}

+ 302 - 0
core/iwasm/fast-jit/jit_ir.def

@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+/**
+ * @file   jit-ir.def
+ *
+ * @brief  Definition of JIT IR instructions and annotations.
+ */
+
+/**
+ * @def INSN (NAME, OPND_KIND, OPND_NUM, FIRST_USE)
+ *
+ * Definition of IR instructions
+ *
+ * @param NAME name of the opcode
+ * @param OPND_KIND kind of the operand(s)
+ * @param OPND_NUM number of the operand(s)
+ * @param FIRST_USE index of the first use register
+ *
+ * @p OPND_KIND and @p OPND_NUM together determine the format of an
+ * instruction.  There are four kinds of formats:
+ *
+ * 1) Reg: fixed-number register operands, @p OPND_NUM specifies the
+ * number of operands;
+ *
+ * 2) VReg: variable-number register operands, @p OPND_NUM specifies
+ * the number of fixed register operands;
+ *
+ * 3) TableSwitch: tableswitch instruction's format, @p OPND_NUM must
+ * be 1;
+ *
+ * 4) LookupSwitch: lookupswitch instruction's format, @p OPND_NUM
+ * must be 1.
+ *
+ * Instruction operands are all registers and they are organized in an
+ * order that all registers defined by the instruction, if any, appear
+ * before the registers used by the instruction. The @p FIRST_USE is
+ * the index of the first use register in the register vector sorted
+ * in this order. Use @c jit_insn_opnd_regs to get the register
+ * vector in this order and use @c jit_insn_opnd_first_use to get the
+ * index of the first use register.
+ *
+ * Every instruction with name @p NAME has the following definitions:
+ *
+ * @c JEFF_OP_NAME: the enum opcode of insn NAME
+ * @c jit_insn_new_NAME (...): creates a new instance of insn NAME
+ *
+ * An instruction is deleted by function:
+ *
+ * @c jit_insn_delete (@p insn)
+ *
+ * In the scope of this IR's terminology, operand and argument have
+ * different meanings. The operand is a general notation, which
+ * denotes every raw operand of an instruction, while the argument
+ * only denotes the variable part of operands of instructions of VReg
+ * kind. For example, a VReg instruction phi node "r0 = phi(r1, r2)"
+ * has three operands opnd[0]: r0, opnd[1]: r1 and opnd[2]: r2, but
+ * only two arguments arg[0]: r1 and arg[1]: r2.  Operands or
+ * arguments of instructions with various formats can be access
+ * through the following APIs:
+ *
+ * @c jit_insn_opnd (@p insn, @p n): for Reg_N formats
+ * @c jit_insn_opndv (@p insn, @p n): for VReg_N formats
+ * @c jit_insn_opndv_num (@p insn): for VReg_N formats
+ * @c jit_insn_opndts (@p insn): for TableSwitch_1 format
+ * @c jit_insn_opndls (@p insn): for LookupSwitch_1 format
+ */
+
+#ifndef INSN
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE)
+#endif
+
+/* Move and conversion instructions that transfer values among
+   registers of the same kind (move) or different kinds (convert) */
+INSN(MOV, Reg, 2, 1)
+INSN(PHI, VReg, 1, 1)
+
+/* conversion. will extend or truncate */
+INSN(I8TOI32, Reg, 2, 1)
+INSN(I8TOI64, Reg, 2, 1)
+INSN(I16TOI32, Reg, 2, 1)
+INSN(I16TOI64, Reg, 2, 1)
+INSN(I32TOI8, Reg, 2, 1)
+INSN(I32TOU8, Reg, 2, 1)
+INSN(I32TOI16, Reg, 2, 1)
+INSN(I32TOU16, Reg, 2, 1)
+INSN(I32TOI64, Reg, 2, 1)
+INSN(I32TOF32, Reg, 2, 1)
+INSN(I32TOF64, Reg, 2, 1)
+INSN(U32TOI64, Reg, 2, 1)
+INSN(U32TOF32, Reg, 2, 1)
+INSN(U32TOF64, Reg, 2, 1)
+INSN(I64TOI8, Reg, 2, 1)
+INSN(I64TOI16, Reg, 2, 1)
+INSN(I64TOI32, Reg, 2, 1)
+INSN(I64TOF32, Reg, 2, 1)
+INSN(I64TOF64, Reg, 2, 1)
+INSN(F32TOI32, Reg, 2, 1)
+INSN(F32TOI64, Reg, 2, 1)
+INSN(F32TOF64, Reg, 2, 1)
+INSN(F32TOU32, Reg, 2, 1)
+INSN(F64TOI32, Reg, 2, 1)
+INSN(F64TOI64, Reg, 2, 1)
+INSN(F64TOF32, Reg, 2, 1)
+INSN(F64TOU32, Reg, 2, 1)
+
+/**
+ * Re-interpret binary presentations:
+ *   *(i32 *)&f32, *(i64 *)&f64, *(f32 *)&i32, *(f64 *)&i64
+ */
+INSN(I32CASTF32, Reg, 2, 1)
+INSN(I64CASTF64, Reg, 2, 1)
+INSN(F32CASTI32, Reg, 2, 1)
+INSN(F64CASTI64, Reg, 2, 1)
+
+/* Arithmetic and bitwise instructions: */
+INSN(NEG, Reg, 2, 1)
+INSN(NOT, Reg, 2, 1)
+INSN(ADD, Reg, 3, 1)
+INSN(SUB, Reg, 3, 1)
+INSN(MUL, Reg, 3, 1)
+INSN(DIV_S, Reg, 3, 1)
+INSN(REM_S, Reg, 3, 1)
+INSN(DIV_U, Reg, 3, 1)
+INSN(REM_U, Reg, 3, 1)
+INSN(SHL, Reg, 3, 1)
+INSN(SHRS, Reg, 3, 1)
+INSN(SHRU, Reg, 3, 1)
+INSN(ROTL, Reg, 3, 1)
+INSN(ROTR, Reg, 3, 1)
+INSN(OR, Reg, 3, 1)
+INSN(XOR, Reg, 3, 1)
+INSN(AND, Reg, 3, 1)
+INSN(CMP, Reg, 3, 1)
+INSN(MAX, Reg, 3, 1)
+INSN(MIN, Reg, 3, 1)
+INSN(CLZ, Reg, 2, 1)
+INSN(CTZ, Reg, 2, 1)
+INSN(POPCNT, Reg, 2, 1)
+
+/* Select instruction: */
+INSN(SELECTEQ, Reg, 4, 1)
+INSN(SELECTNE, Reg, 4, 1)
+INSN(SELECTGTS, Reg, 4, 1)
+INSN(SELECTGES, Reg, 4, 1)
+INSN(SELECTLTS, Reg, 4, 1)
+INSN(SELECTLES, Reg, 4, 1)
+INSN(SELECTGTU, Reg, 4, 1)
+INSN(SELECTGEU, Reg, 4, 1)
+INSN(SELECTLTU, Reg, 4, 1)
+INSN(SELECTLEU, Reg, 4, 1)
+
+/* Memory access instructions: */
+INSN(LDEXECENV, Reg, 1, 1)
+INSN(LDJITINFO, Reg, 1, 1)
+INSN(LDI8, Reg, 3, 1)
+INSN(LDU8, Reg, 3, 1)
+INSN(LDI16, Reg, 3, 1)
+INSN(LDU16, Reg, 3, 1)
+INSN(LDI32, Reg, 3, 1)
+INSN(LDU32, Reg, 3, 1)
+INSN(LDI64, Reg, 3, 1)
+INSN(LDU64, Reg, 3, 1)
+INSN(LDF32, Reg, 3, 1)
+INSN(LDF64, Reg, 3, 1)
+INSN(LDPTR, Reg, 3, 1)
+INSN(LDV64, Reg, 3, 1)
+INSN(LDV128, Reg, 3, 1)
+INSN(LDV256, Reg, 3, 1)
+INSN(STI8, Reg, 3, 0)
+INSN(STI16, Reg, 3, 0)
+INSN(STI32, Reg, 3, 0)
+INSN(STI64, Reg, 3, 0)
+INSN(STF32, Reg, 3, 0)
+INSN(STF64, Reg, 3, 0)
+INSN(STPTR, Reg, 3, 0)
+INSN(STV64, Reg, 3, 1)
+INSN(STV128, Reg, 3, 1)
+INSN(STV256, Reg, 3, 1)
+
+/* Control instructions */
+INSN(JMP, Reg, 1, 0)
+INSN(BEQ, Reg, 3, 0)
+INSN(BNE, Reg, 3, 0)
+INSN(BGTS, Reg, 3, 0)
+INSN(BGES, Reg, 3, 0)
+INSN(BLTS, Reg, 3, 0)
+INSN(BLES, Reg, 3, 0)
+INSN(BGTU, Reg, 3, 0)
+INSN(BGEU, Reg, 3, 0)
+INSN(BLTU, Reg, 3, 0)
+INSN(BLEU, Reg, 3, 0)
+INSN(LOOKUPSWITCH, LookupSwitch, 1, 0)
+
+/* Call and return instructions */
+INSN(CALLNATIVE, VReg, 2, 1)
+INSN(CALLBC, Reg, 3, 2)
+INSN(RETURNBC, Reg, 3, 0)
+INSN(RETURN, Reg, 1, 0)
+
+#undef INSN
+
+/**
+ * @def ANN_LABEL (TYPE, NAME)
+ *
+ * Definition of label annotations.
+ *
+ * @param TYPE type of the annotation
+ * @param NAME name of the annotation
+ *
+ * Each defined annotation with name NAME has the following APIs:
+ *
+ * @c jit_annl_NAME (cc, label): accesses the annotation NAME of
+ * label @p label
+ * @c jit_annl_enable_NAME (cc): enables the annotation NAME
+ * @c jit_annl_disable_NAME (cc): disables the annotation NAME
+ * @c jit_annl_is_enabled_NAME (cc): check whether the annotation NAME
+ * is enabled
+ */
+
+#ifndef ANN_LABEL
+#define ANN_LABEL(TYPE, NAME)
+#endif
+
+/* Basic Block of a label.  */
+ANN_LABEL(JitBasicBlock *, basic_block)
+/* Predecessor number of the block that is only used in
+   jit_cc_update_cfg for updating the CFG.  */
+ANN_LABEL(uint16, pred_num)
+/* Execution frequency of a block.  We can split critical edges with
+   empty blocks so we don't need to store frequencies of edges.  */
+ANN_LABEL(uint16, freq)
+/* Begin bytecode instruction pointer of the block.  */
+ANN_LABEL(uint8 *, begin_bcip)
+/* End bytecode instruction pointer of the block.  */
+ANN_LABEL(uint8 *, end_bcip)
+/* Stack pointer offset at the end of the block.  */
+ANN_LABEL(uint16, end_sp)
+/* The label of the next physically adjacent block.  */
+ANN_LABEL(JitReg, next_label)
+/* Compiled code address of the block.  */
+ANN_LABEL(void *, jitted_addr)
+
+#undef ANN_LABEL
+
+/**
+ * @def ANN_INSN (TYPE, NAME)
+ *
+ * Definition of instruction annotations.
+ *
+ * @param TYPE type of the annotation
+ * @param NAME name of the annotation
+ *
+ * Each defined annotation with name NAME has the following APIs:
+ *
+ * @c jit_anni_NAME (cc, insn): accesses the annotation NAME of
+ * instruction @p insn
+ * @c jit_anni_enable_NAME (cc): enables the annotation NAME
+ * @c jit_anni_disable_NAME (cc): disables the annotation NAME
+ * @c jit_anni_is_enabled_NAME (cc): check whether the annotation NAME
+ * is enabled
+ */
+
+#ifndef ANN_INSN
+#define ANN_INSN(TYPE, NAME)
+#endif
+
+/* A private annotation for linking instructions with the same hash
+   value, which is only used by the compilation context's hash table
+   of instructions.  */
+ANN_INSN(JitInsn *, _hash_link)
+
+#undef ANN_INSN
+
+/**
+ * @def ANN_REG (TYPE, NAME)
+ *
+ * Definition of register annotations.
+ *
+ * @param TYPE type of the annotation
+ * @param NAME name of the annotation
+ *
+ * Each defined annotation with name NAME has the following APIs:
+ *
+ * @c jit_annr_NAME (cc, reg): accesses the annotation NAME of
+ * register @p reg
+ * @c jit_annr_enable_NAME (cc): enables the annotation NAME
+ * @c jit_annr_disable_NAME (cc): disables the annotation NAME
+ * @c jit_annr_is_enabled_NAME (cc): check whether the annotation NAME
+ * is enabled
+ */
+
+#ifndef ANN_REG
+#define ANN_REG(TYPE, NAME)
+#endif
+
+/* Defining instruction of registers satisfying SSA property.  */
+ANN_REG(JitInsn *, def_insn)
+
+#undef ANN_REG

+ 1874 - 0
core/iwasm/fast-jit/jit_ir.h

@@ -0,0 +1,1874 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_IR_H_
+#define _JIT_IR_H_
+
+#include "bh_platform.h"
+#include "../interpreter/wasm.h"
+#include "jit_utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register (operand) representation of JIT IR.
+ *
+ * Encoding: [4-bit: kind, 28-bit register no.]
+ *
+ * Registers in JIT IR are classified into different kinds according
+ * to types of values they can hold. The classification is based on
+ * most processors' hardware register classifications, which include
+ * various sets of integer, floating point and vector registers with
+ * different sizes. These registers can be mapped onto corresponding
+ * kinds of hardware registers by register allocator. Instructions
+ * can only operate on allowed kinds of registers. For example, an
+ * integer instruction cannot operate on floating point or vector
+ * registers. Some encodings of these kinds of registers also
+ * represent immediate constant values and indexes to constant tables
+ * (see below). In that case, those registers are read-only. Writing
+ * to them is illegal. Reading from an immediate constant value
+ * register always returns the constant value encoded in the register
+ * no. Reading from a constant table index register always returns
+ * the constant value stored at the encoded index of the constant
+ * table of the register's kind. Immediate constant values and values
+ * indexed by constant table indexes can only be loaded into the
+ * corresponding kinds of registers if they must be loaded into
+ * registers. Besides these common kinds of registers, labels of
+ * basic blocks are also treated as registers of a special kind, which
+ * hold code addresses of basic block labels and are read-only. Each
+ * basic block is assigned one unique label register. With this
+ * unification, we can use the same set of load instructions to load
+ * values either from addresses stored in normal registers or from
+ * addresses of labels. Besides these register kinds, the void kind
+ * is a special kind of registers to denote some error occurs when a
+ * normal register is expected. Or it can be used as result operand
+ * of call and invoke instructions to denote no return values. The
+ * variable registers are classified into two sets: the hard registers
+ * whose register numbers are less than the hard register numbers of
+ * their kinds and the virtual registers whose register numbers are
+ * greater than or equal to the hard register numbers. Before
+ * register allocation is done, hard registers may appear in the IR
+ * due to special usages of passes frontend (e.g. fp_reg and exec_env_reg)
+ * or lower_cg. In the mean time (including during register
+ * allocation), those hard registers are treated same as virtual
+ * registers except that they may not be SSA and they can only be
+ * allocated to the hard registers of themselves.
+ *
+ * Classification of registers:
+ *   + void register (kind == JIT_REG_KIND_VOID, no. must be 0)
+ *   + label registers (kind == JIT_REG_KIND_L32)
+ *   + value registers (kind == JIT_REG_KIND_I32/I64/F32/F64/V64/V128/V256)
+ *   | + constants (_JIT_REG_CONST_VAL_FLAG | _JIT_REG_CONST_IDX_FLAG)
+ *   | | + constant values (_JIT_REG_CONST_VAL_FLAG)
+ *   | | + constant indexes (_JIT_REG_CONST_IDX_FLAG)
+ *   | + variables (!(_JIT_REG_CONST_VAL_FLAG | _JIT_REG_CONST_IDX_FLAG))
+ *   | | + hard registers (no. < hard register number)
+ *   | | + virtual registers (no. >= hard register number)
+ */
+typedef uint32 JitReg;
+
+/*
+ * Mask and shift bits of register kind.
+ */
+#define _JIT_REG_KIND_MASK 0xf0000000
+#define _JIT_REG_KIND_SHIFT 28
+
+/*
+ * Mask of register no. which must be the least significant bits.
+ */
+#define _JIT_REG_NO_MASK (~_JIT_REG_KIND_MASK)
+
+/*
+ * Constant value flag (the most significant bit) of register
+ * no. field of integer, floating point and vector registers. If this
+ * flag is set in the register no., the rest bits of register
+ * no. represent a signed (27-bit) integer constant value of the
+ * corresponding type of the register and the register is read-only.
+ */
+#define _JIT_REG_CONST_VAL_FLAG ((_JIT_REG_NO_MASK >> 1) + 1)
+
+/*
+ * Constant index flag of non-constant-value (constant value flag is
+ * not set in register no. field) integer, floating point and vector
+ * regisers. If this flag is set, the rest bits of the register
+ * no. represent an index to the constant value table of the
+ * corresponding type of the register and the register is read-only.
+ */
+#define _JIT_REG_CONST_IDX_FLAG (_JIT_REG_CONST_VAL_FLAG >> 1)
+
+/**
+ * Register kinds. Don't change the order of the defined values. The
+ * L32 kind must be after all normal kinds (see _const_val and _reg_ann
+ * of JitCompContext).
+ */
+typedef enum JitRegKind {
+    JIT_REG_KIND_VOID = 0x00, /* void type */
+    JIT_REG_KIND_I32 = 0x01,  /* 32-bit signed or unsigned integer */
+    JIT_REG_KIND_I64 = 0x02,  /* 64-bit signed or unsigned integer */
+    JIT_REG_KIND_F32 = 0x03,  /* 32-bit floating point */
+    JIT_REG_KIND_F64 = 0x04,  /* 64-bit floating point */
+    JIT_REG_KIND_V64 = 0x05,  /* 64-bit vector */
+    JIT_REG_KIND_V128 = 0x06, /* 128-bit vector */
+    JIT_REG_KIND_V256 = 0x07, /* 256-bit vector */
+    JIT_REG_KIND_L32 = 0x08,  /* 32-bit label address */
+    JIT_REG_KIND_NUM          /* number of register kinds */
+} JitRegKind;
+
+#if UINTPTR_MAX == UINT64_MAX
+#define JIT_REG_KIND_PTR JIT_REG_KIND_I64
+#else
+#define JIT_REG_KIND_PTR JIT_REG_KIND_I32
+#endif
+
+/**
+ * Construct a new JIT IR register from the kind and no.
+ *
+ * @param reg_kind register kind
+ * @param reg_no register no.
+ *
+ * @return the new register with the given kind and no.
+ */
+static inline JitReg
+jit_reg_new(unsigned reg_kind, unsigned reg_no)
+{
+    return (JitReg)((reg_kind << _JIT_REG_KIND_SHIFT) | reg_no);
+}
+
+/**
+ * Get the register kind of the given register.
+ *
+ * @param r a JIT IR register
+ *
+ * @return the register kind of register r
+ */
+static inline int
+jit_reg_kind(JitReg r)
+{
+    return (r & _JIT_REG_KIND_MASK) >> _JIT_REG_KIND_SHIFT;
+}
+
+/**
+ * Get the register no. of the given JIT IR register.
+ *
+ * @param r a JIT IR register
+ *
+ * @return the register no. of register r
+ */
+static inline int
+jit_reg_no(JitReg r)
+{
+    return r & _JIT_REG_NO_MASK;
+}
+
+/**
+ * Check whether the given register is a normal value register.
+ *
+ * @param r a JIT IR register
+ *
+ * @return true iff the register is a normal value register
+ */
+static inline bool
+jit_reg_is_value(JitReg r)
+{
+    unsigned kind = jit_reg_kind(r);
+    return kind > JIT_REG_KIND_VOID && kind < JIT_REG_KIND_L32;
+}
+
+/**
+ * Check whether the given register is a constant value.
+ *
+ * @param r a JIT IR register
+ *
+ * @return true iff register r is a constant value
+ */
+static inline bool
+jit_reg_is_const_val(JitReg r)
+{
+    return jit_reg_is_value(r) && (r & _JIT_REG_CONST_VAL_FLAG);
+}
+
+/**
+ * Check whether the given register is a constant table index.
+ *
+ * @param r a JIT IR register
+ *
+ * @return true iff register r is a constant table index
+ */
+static inline bool
+jit_reg_is_const_idx(JitReg r)
+{
+    return (jit_reg_is_value(r) && !jit_reg_is_const_val(r)
+            && (r & _JIT_REG_CONST_IDX_FLAG));
+}
+
+/**
+ * Check whether the given register is a constant.
+ *
+ * @param r a JIT IR register
+ *
+ * @return true iff register r is a constant
+ */
+static inline bool
+jit_reg_is_const(JitReg r)
+{
+    return (jit_reg_is_value(r)
+            && (r & (_JIT_REG_CONST_VAL_FLAG | _JIT_REG_CONST_IDX_FLAG)));
+}
+
+/**
+ * Check whether the given register is a normal variable register.
+ *
+ * @param r a JIT IR register
+ *
+ * @return true iff the register is a normal variable register
+ */
+static inline bool
+jit_reg_is_variable(JitReg r)
+{
+    return (jit_reg_is_value(r)
+            && !(r & (_JIT_REG_CONST_VAL_FLAG | _JIT_REG_CONST_IDX_FLAG)));
+}
+
+/**
+ * Test whether the register is the given kind.
+ *
+ * @param KIND register kind name
+ * @param R register
+ *
+ * @return true if the register is the given kind
+ */
+#define jit_reg_is_kind(KIND, R) (jit_reg_kind(R) == JIT_REG_KIND_##KIND)
+
+/**
+ * Construct a zero IR register with given the kind.
+ *
+ * @param kind the kind of the value
+ *
+ * @return a constant register of zero
+ */
+static inline JitReg
+jit_reg_new_zero(unsigned kind)
+{
+    bh_assert(kind != JIT_REG_KIND_VOID && kind < JIT_REG_KIND_L32);
+    return jit_reg_new(kind, _JIT_REG_CONST_VAL_FLAG);
+}
+
+/**
+ * Test whether the register is a zero constant value.
+ *
+ * @param reg an IR register
+ *
+ * @return true iff the register is a constant zero
+ */
+static inline JitReg
+jit_reg_is_zero(JitReg reg)
+{
+    return (jit_reg_is_value(reg)
+            && jit_reg_no(reg) == _JIT_REG_CONST_VAL_FLAG);
+}
+
+/**
+ * Operand of instructions with fixed-number register operand(s).
+ */
+typedef JitReg JitOpndReg;
+
+/**
+ * Operand of instructions with variable-number register operand(s).
+ */
+typedef struct JitOpndVReg {
+    uint32 _reg_num;
+    JitReg _reg[1];
+} JitOpndVReg;
+
+/**
+ * Operand of lookupswitch instruction.
+ */
+typedef struct JitOpndLookupSwitch {
+    /* NOTE: distance between JitReg operands must be the same (see
+       jit_insn_opnd_regs). */
+    JitReg value;           /* the value to be compared */
+    uint32 match_pairs_num; /* match pairs number */
+    /* NOTE: offset between adjacent targets must be sizeof
+       (match_pairs[0]) (see implementation of jit_basic_block_succs),
+       so the default_target field must be here. */
+    JitReg default_target; /* default target BB */
+    struct {
+        int32 value;   /* match value of the match pair */
+        JitReg target; /* target BB of the match pair */
+    } match_pairs[1];  /* match pairs of the instruction */
+} JitOpndLookupSwitch;
+
+/**
+ * Instruction of JIT IR.
+ */
+typedef struct JitInsn {
+    /* Pointers to the previous and next instructions. */
+    struct JitInsn *prev;
+    struct JitInsn *next;
+
+    /* Opcode of the instruction. */
+    uint16 opcode;
+
+    /* Reserved field that may be used by optimizations locally. */
+    uint8 flags_u8;
+
+    /* The unique ID of the instruction. */
+    uint16 uid;
+
+    /* Operands for different kinds of instructions. */
+    union {
+        /* For instructions with fixed-number register operand(s). */
+        JitOpndReg _opnd_Reg[1];
+
+        /* For instructions with variable-number register operand(s). */
+        JitOpndVReg _opnd_VReg;
+
+        /* For lookupswitch instruction. */
+        JitOpndLookupSwitch _opnd_LookupSwitch;
+    } _opnd;
+} JitInsn;
+
+/**
+ * Opcodes of IR instructions.
+ */
+typedef enum JitOpcode {
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE) JIT_OP_##NAME,
+#include "jit_ir.def"
+#undef INSN
+    JIT_OP_OPCODE_NUMBER
+} JitOpcode;
+
+/*
+ * Helper functions for creating new instructions.  Don't call them
+ * directly.  Use jit_insn_new_NAME, such as jit_insn_new_MOV instead.
+ */
+JitInsn *
+_jit_insn_new_Reg_1(JitOpcode opc, JitReg r0);
+JitInsn *
+_jit_insn_new_Reg_2(JitOpcode opc, JitReg r0, JitReg r1);
+JitInsn *
+_jit_insn_new_Reg_3(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2);
+JitInsn *
+_jit_insn_new_Reg_4(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2, JitReg r3);
+JitInsn *
+_jit_insn_new_Reg_5(JitOpcode opc, JitReg r0, JitReg r1, JitReg r2, JitReg r3,
+                    JitReg r4);
+JitInsn *
+_jit_insn_new_VReg_1(JitOpcode opc, JitReg r0, int n);
+JitInsn *
+_jit_insn_new_VReg_2(JitOpcode opc, JitReg r0, JitReg r1, int n);
+JitInsn *
+_jit_insn_new_LookupSwitch_1(JitOpcode opc, JitReg value, uint32 num);
+
+/*
+ * Instruction creation functions jit_insn_new_NAME, where NAME is the
+ * name of the instruction defined in jit_ir.def.
+ */
+#define ARG_DECL_Reg_1 JitReg r0
+#define ARG_LIST_Reg_1 r0
+#define ARG_DECL_Reg_2 JitReg r0, JitReg r1
+#define ARG_LIST_Reg_2 r0, r1
+#define ARG_DECL_Reg_3 JitReg r0, JitReg r1, JitReg r2
+#define ARG_LIST_Reg_3 r0, r1, r2
+#define ARG_DECL_Reg_4 JitReg r0, JitReg r1, JitReg r2, JitReg r3
+#define ARG_LIST_Reg_4 r0, r1, r2, r3
+#define ARG_DECL_Reg_5 JitReg r0, JitReg r1, JitReg r2, JitReg r3, JitReg r4
+#define ARG_LIST_Reg_5 r0, r1, r2, r3, r4
+#define ARG_DECL_VReg_1 JitReg r0, int n
+#define ARG_LIST_VReg_1 r0, n
+#define ARG_DECL_VReg_2 JitReg r0, JitReg r1, int n
+#define ARG_LIST_VReg_2 r0, r1, n
+#define ARG_DECL_LookupSwitch_1 JitReg value, uint32 num
+#define ARG_LIST_LookupSwitch_1 value, num
+#define INSN(NAME, OPND_KIND, OPND_NUM, FIRST_USE)             \
+    static inline JitInsn *jit_insn_new_##NAME(                \
+        ARG_DECL_##OPND_KIND##_##OPND_NUM)                     \
+    {                                                          \
+        return _jit_insn_new_##OPND_KIND##_##OPND_NUM(         \
+            JIT_OP_##NAME, ARG_LIST_##OPND_KIND##_##OPND_NUM); \
+    }
+#include "jit_ir.def"
+#undef INSN
+#undef ARG_DECL_Reg_1
+#undef ARG_LIST_Reg_1
+#undef ARG_DECL_Reg_2
+#undef ARG_LIST_Reg_2
+#undef ARG_DECL_Reg_3
+#undef ARG_LIST_Reg_3
+#undef ARG_DECL_Reg_4
+#undef ARG_LIST_Reg_4
+#undef ARG_DECL_Reg_5
+#undef ARG_LIST_Reg_5
+#undef ARG_DECL_VReg_1
+#undef ARG_LIST_VReg_1
+#undef ARG_DECL_VReg_2
+#undef ARG_LIST_VReg_2
+#undef ARG_DECL_LookupSwitch_1
+#undef ARG_LIST_LookupSwitch_1
+
+/**
+ * Delete an instruction
+ *
+ * @param insn an instruction to be deleted
+ */
+static inline void
+jit_insn_delete(JitInsn *insn)
+{
+    jit_free(insn);
+}
+
+/*
+ * Runtime type check functions that check whether accessing the n-th
+ * operand is legal. They are only used for in self-verification
+ * mode.
+ *
+ * @param insn any JIT IR instruction
+ * @param n index of the operand to access
+ *
+ * @return true if the access is legal
+ */
+bool
+_jit_insn_check_opnd_access_Reg(const JitInsn *insn, unsigned n);
+bool
+_jit_insn_check_opnd_access_VReg(const JitInsn *insn, unsigned n);
+bool
+_jit_insn_check_opnd_access_LookupSwitch(const JitInsn *insn);
+
+/**
+ * Get the pointer to the n-th register operand of the given
+ * instruction. The instruction format must be Reg.
+ *
+ * @param insn a Reg format instruction
+ * @param n index of the operand to get
+ *
+ * @return pointer to the n-th operand
+ */
+static inline JitReg *
+jit_insn_opnd(JitInsn *insn, int n)
+{
+    bh_assert(_jit_insn_check_opnd_access_Reg(insn, n));
+    return &insn->_opnd._opnd_Reg[n];
+}
+
+/**
+ * Get the pointer to the n-th register operand of the given
+ * instruction. The instruction format must be VReg.
+ *
+ * @param insn a VReg format instruction
+ * @param n index of the operand to get
+ *
+ * @return pointer to the n-th operand
+ */
+static inline JitReg *
+jit_insn_opndv(JitInsn *insn, int n)
+{
+    bh_assert(_jit_insn_check_opnd_access_VReg(insn, n));
+    return &insn->_opnd._opnd_VReg._reg[n];
+}
+
+/**
+ * Get the operand number of the given instruction. The instruction
+ * format must be VReg.
+ *
+ * @param insn a VReg format instruction
+ *
+ * @return operand number of the instruction
+ */
+static inline unsigned
+jit_insn_opndv_num(const JitInsn *insn)
+{
+    bh_assert(_jit_insn_check_opnd_access_VReg(insn, 0));
+    return insn->_opnd._opnd_VReg._reg_num;
+}
+
+/**
+ * Get the pointer to the LookupSwitch operand of the given
+ * instruction. The instruction format must be LookupSwitch.
+ *
+ * @param insn a LookupSwitch format instruction
+ *
+ * @return pointer to the operand
+ */
+static inline JitOpndLookupSwitch *
+jit_insn_opndls(JitInsn *insn)
+{
+    bh_assert(_jit_insn_check_opnd_access_LookupSwitch(insn));
+    return &insn->_opnd._opnd_LookupSwitch;
+}
+
+/**
+ * Insert instruction @p insn2 before instruction @p insn1.
+ *
+ * @param insn1 any instruction
+ * @param insn2 any instruction
+ */
+void
+jit_insn_insert_before(JitInsn *insn1, JitInsn *insn2);
+
+/**
+ * Insert instruction @p insn2 after instruction @p insn1.
+ *
+ * @param insn1 any instruction
+ * @param insn2 any instruction
+ */
+void
+jit_insn_insert_after(JitInsn *insn1, JitInsn *insn2);
+
+/**
+ * Unlink the instruction @p insn from the containing list.
+ *
+ * @param insn an instruction
+ */
+void
+jit_insn_unlink(JitInsn *insn);
+
+/**
+ * Get the hash value of the comparable instruction (pure functions
+ * and exception check instructions).
+ *
+ * @param insn an instruction
+ *
+ * @return hash value of the instruction
+ */
+unsigned
+jit_insn_hash(JitInsn *insn);
+
+/**
+ * Compare whether the two comparable instructions are the same.
+ *
+ * @param insn1 the first instruction
+ * @param insn2 the second instruction
+ *
+ * @return true if the two instructions are the same
+ */
+bool
+jit_insn_equal(JitInsn *insn1, JitInsn *insn2);
+
+/**
+ * Register vector for accessing predecessors and successors of a
+ * basic block.
+ */
+typedef struct JitRegVec {
+    JitReg *_base; /* points to the first register */
+    int32 _stride; /* stride to the next register */
+    uint32 num;    /* number of registers */
+} JitRegVec;
+
+/**
+ * Get the address of the i-th register in the register vector.
+ *
+ * @param vec a register vector
+ * @param i index to the register vector
+ *
+ * @return the address of the i-th register in the vector
+ */
+static inline JitReg *
+jit_reg_vec_at(const JitRegVec *vec, unsigned i)
+{
+    bh_assert(i < vec->num);
+    return vec->_base + vec->_stride * i;
+}
+
+/**
+ * Visit each element in a register vector.
+ *
+ * @param V (JitRegVec) the register vector
+ * @param I (unsigned) index variable in the vector
+ * @param R (JitReg *) resiger pointer variable
+ */
+#define JIT_REG_VEC_FOREACH(V, I, R) \
+    for ((I) = 0, (R) = (V)._base; (I) < (V).num; (I)++, (R) += (V)._stride)
+
+/**
+ * Visit each register defined by an instruction.
+ *
+ * @param V (JitRegVec) register vector of the instruction
+ * @param I (unsigned) index variable in the vector
+ * @param R (JitReg *) resiger pointer variable
+ * @param F index of the first used register
+ */
+#define JIT_REG_VEC_FOREACH_DEF(V, I, R, F) \
+    for ((I) = 0, (R) = (V)._base; (I) < (F); (I)++, (R) += (V)._stride)
+
+/**
+ * Visit each register used by an instruction.
+ *
+ * @param V (JitRegVec) register vector of the instruction
+ * @param I (unsigned) index variable in the vector
+ * @param R (JitReg *) resiger pointer variable
+ * @param F index of the first used register
+ */
+#define JIT_REG_VEC_FOREACH_USE(V, I, R, F)                             \
+    for ((I) = (F), (R) = (V)._base + (F) * (V)._stride; (I) < (V).num; \
+         (I)++, (R) += (V)._stride)
+
+/**
+ * Get a generic register vector that contains all register operands.
+ * The registers defined by the instruction, if any, appear before the
+ * registers used by the instruction.
+ *
+ * @param insn an instruction
+ *
+ * @return a register vector containing register operands
+ */
+JitRegVec
+jit_insn_opnd_regs(JitInsn *insn);
+
+/**
+ * Get the index of the first use register in the register vector
+ * returned by jit_insn_opnd_regs.
+ *
+ * @param insn an instruction
+ *
+ * @return the index of the first use register in the register vector
+ */
+unsigned
+jit_insn_opnd_first_use(JitInsn *insn);
+
+/**
+ * Basic Block of JIT IR. It is a basic block only if the IR is not in
+ * non-BB form. The block is represented by a special phi node, whose
+ * result and arguments are label registers. The result label is the
+ * containing block's label. The arguments are labels of predecessors
+ * of the block. Successor labels are stored in the last instruction,
+ * which must be a control flow instruction. Instructions of a block
+ * are linked in a circular linked list with the block phi node as the
+ * end of the list. The next and prev field of the block phi node
+ * point to the first and last instructions of the block.
+ */
+typedef JitInsn JitBasicBlock;
+
+/**
+ * Create a new basic block instance.
+ *
+ * @param label the label of the new basic block
+ * @param n number of predecessors
+ *
+ * @return the created new basic block instance
+ */
+JitBasicBlock *
+jit_basic_block_new(JitReg label, int n);
+
+/**
+ * Delete a basic block instance and all instructions init.
+ *
+ * @param block the basic block to be deleted
+ */
+void
+jit_basic_block_delete(JitBasicBlock *block);
+
+/**
+ * Get the label of the basic block.
+ *
+ * @param block a basic block instance
+ *
+ * @return the label of the basic block
+ */
+static inline JitReg
+jit_basic_block_label(JitBasicBlock *block)
+{
+    return *(jit_insn_opndv(block, 0));
+}
+
+/**
+ * Get the first instruction of the basic block.
+ *
+ * @param block a basic block instance
+ *
+ * @return the first instruction of the basic block
+ */
+static inline JitInsn *
+jit_basic_block_first_insn(JitBasicBlock *block)
+{
+    return block->next;
+}
+
+/**
+ * Get the last instruction of the basic block.
+ *
+ * @param block a basic block instance
+ *
+ * @return the last instruction of the basic block
+ */
+static inline JitInsn *
+jit_basic_block_last_insn(JitBasicBlock *block)
+{
+    return block->prev;
+}
+
+/**
+ * Get the end of instruction list of the basic block (which is always
+ * the block itself).
+ *
+ * @param block a basic block instance
+ *
+ * @return the end of instruction list of the basic block
+ */
+static inline JitInsn *
+jit_basic_block_end_insn(JitBasicBlock *block)
+{
+    return block;
+}
+
+/**
+ * Visit each instruction in the block from the first to the last. In
+ * the code block, the instruction pointer @p I must be a valid
+ * pointer to an instruction in the block. That means if the
+ * instruction may be deleted, @p I must point to the previous or next
+ * valid instruction before the next iteration.
+ *
+ * @param B (JitBasicBlock *) the block
+ * @param I (JitInsn *) instruction visited
+ */
+#define JIT_FOREACH_INSN(B, I)                                                \
+    for (I = jit_basic_block_first_insn(B); I != jit_basic_block_end_insn(B); \
+         I = I->next)
+
+/**
+ * Visit each instruction in the block from the last to the first. In
+ * the code block, the instruction pointer @p I must be a valid
+ * pointer to an instruction in the block. That means if the
+ * instruction may be deleted, @p I must point to the previous or next
+ * valid instruction before the next iteration.
+ *
+ * @param B (JitBasicBlock *) the block
+ * @param I (JitInsn *) instruction visited
+ */
+#define JIT_FOREACH_INSN_REVERSE(B, I)                                       \
+    for (I = jit_basic_block_last_insn(B); I != jit_basic_block_end_insn(B); \
+         I = I->prev)
+
+/**
+ * Prepend an instruction in the front of the block. The position is
+ * just after the block phi node (the block instance itself).
+ *
+ * @param block a block
+ * @param insn an instruction to be prepended
+ */
+static inline void
+jit_basic_block_prepend_insn(JitBasicBlock *block, JitInsn *insn)
+{
+    jit_insn_insert_after(block, insn);
+}
+
+/**
+ * Append an instruction to the end of the basic block.
+ *
+ * @param block a basic block
+ * @param insn an instruction to be appended
+ */
+static inline void
+jit_basic_block_append_insn(JitBasicBlock *block, JitInsn *insn)
+{
+    jit_insn_insert_before(block, insn);
+}
+
+/**
+ * Get the register vector of predecessors of a basic block.
+ *
+ * @param block a JIT IR block
+ *
+ * @return register vector of the predecessors
+ */
+JitRegVec
+jit_basic_block_preds(JitBasicBlock *block);
+
+/**
+ * Get the register vector of successors of a basic block.
+ *
+ * @param block a JIT IR basic block
+ *
+ * @return register vector of the successors
+ */
+JitRegVec
+jit_basic_block_succs(JitBasicBlock *block);
+
+/**
+ * Hard register information of one kind.
+ */
+typedef struct JitHardRegInfo {
+    struct {
+        /* Hard register number of this kind. */
+        uint32 num;
+
+        /* Whether each register is fixed. */
+        const uint8 *fixed;
+
+        /* Whether each register is caller-saved in the native ABI. */
+        const uint8 *caller_saved_native;
+
+        /* Whether each register is caller-saved in the JITed ABI. */
+        const uint8 *caller_saved_jitted;
+    } info[JIT_REG_KIND_L32];
+
+    /* The indexes of hard registers of frame pointer, exec_env and cmp. */
+    uint32 fp_hreg_index;
+    uint32 exec_env_hreg_index;
+    uint32 cmp_hreg_index;
+} JitHardRegInfo;
+
+struct JitBlock;
+struct JitCompContext;
+struct JitValueSlot;
+
+/**
+ * Value in the WASM operation stack, each stack element
+ * is a Jit register
+ */
+typedef struct JitValue {
+    struct JitValue *next;
+    struct JitValue *prev;
+    struct JitValueSlot *value;
+    /* VALUE_TYPE_I32/I64/F32/F64/VOID */
+    uint8 type;
+} JitValue;
+
+/**
+ * Value stack, represents stack elements in a WASM block
+ */
+typedef struct JitValueStack {
+    JitValue *value_list_head;
+    JitValue *value_list_end;
+} JitValueStack;
+
+/* Record information of a value slot of local variable or stack
+   during translation.  */
+typedef struct JitValueSlot {
+    /* The virtual register that holds the value of the slot if the
+       value of the slot is in register.  */
+    JitReg reg;
+
+    /* The dirty bit of the value slot. It's set if the value in
+       register is newer than the value in memory.  */
+    uint32 dirty : 1;
+
+    /* Whether the new value in register is a reference, which is valid
+       only when the dirty bit is set.  */
+    uint32 ref : 1;
+
+    /* Committed reference flag.  0: unknown, 1: not-reference, 2:
+       reference.  */
+    uint32 committed_ref : 2;
+} JitValueSlot;
+
+typedef struct JitMemRegs {
+    JitReg memory_inst;
+    /* The following registers should be re-loaded after
+       memory.grow, callbc and callnative */
+    JitReg memory_data;
+    JitReg memory_data_end;
+    JitReg mem_bound_check_1byte;
+    JitReg mem_bound_check_2bytes;
+    JitReg mem_bound_check_4bytes;
+    JitReg mem_bound_check_8bytes;
+    JitReg mem_bound_check_16bytes;
+} JitMemRegs;
+
+typedef struct JitTableRegs {
+    JitReg table_inst;
+    JitReg table_data;
+    /* Should be re-loaded after table.grow,
+       callbc and callnative */
+    JitReg table_cur_size;
+} JitTableRegs;
+
+/* Frame information for translation */
+typedef struct JitFrame {
+    /* The current wasm module */
+    WASMModule *cur_wasm_module;
+    /* The current wasm function */
+    WASMFunction *cur_wasm_func;
+    /* The current wasm function index */
+    uint32 cur_wasm_func_idx;
+    /* The current compilation context */
+    struct JitCompContext *cc;
+
+    /* Max local slot number.  */
+    uint32 max_locals;
+
+    /* Max operand stack slot number.  */
+    uint32 max_stacks;
+
+    /* Instruction pointer */
+    uint8 *ip;
+
+    /* Stack top pointer */
+    JitValueSlot *sp;
+
+    /* Committed instruction pointer */
+    uint8 *committed_ip;
+
+    /* Committed stack top pointer */
+    JitValueSlot *committed_sp;
+
+    /* WASM module instance */
+    JitReg module_inst_reg;
+    /* WASM module */
+    JitReg module_reg;
+    /* module_inst->fast_jit_func_ptrs */
+    JitReg fast_jit_func_ptrs_reg;
+    /* Base address of global data */
+    JitReg global_data_reg;
+    /* Boundary of auxiliary stack */
+    JitReg aux_stack_bound_reg;
+    /* Bottom of auxiliary stack */
+    JitReg aux_stack_bottom_reg;
+    /* Memory instances */
+    JitReg memories_reg;
+    /* Data of memory instances */
+    JitMemRegs *memory_regs;
+    /* Table instances */
+    JitReg tables_reg;
+    /* Data of table instances */
+    JitTableRegs *table_regs;
+
+    /* Local variables */
+    JitValueSlot lp[1];
+} JitFrame;
+
+typedef struct JitIncomingInsn {
+    struct JitIncomingInsn *next;
+    JitInsn *insn;
+    uint32 opnd_idx;
+} JitIncomingInsn, *JitIncomingInsnList;
+
+typedef struct JitBlock {
+    struct JitBlock *next;
+    struct JitBlock *prev;
+
+    /* The current Jit Block */
+    struct JitCompContext *cc;
+
+    /* LABEL_TYPE_BLOCK/LOOP/IF/FUNCTION */
+    uint32 label_type;
+
+    /* code of else opcode of this block, if it is a IF block  */
+    uint8 *wasm_code_else;
+    /* code of end opcode of this block */
+    uint8 *wasm_code_end;
+
+    /* JIT label points to code begin */
+    JitBasicBlock *basic_block_entry;
+    /* JIT label points to code else */
+    JitBasicBlock *basic_block_else;
+    /* JIT label points to code end */
+    JitBasicBlock *basic_block_end;
+
+    /* Incoming INSN for basic_block_else */
+    JitInsn *incoming_insn_for_else_bb;
+    /* Incoming INSNs for basic_block_end */
+    JitIncomingInsnList incoming_insns_for_end_bb;
+
+    /* WASM operation stack */
+    JitValueStack value_stack;
+
+    /* Param count/types/PHIs of this block */
+    uint32 param_count;
+    uint8 *param_types;
+
+    /* Result count/types/PHIs of this block */
+    uint32 result_count;
+    uint8 *result_types;
+
+    /* The begin frame stack pointer of this block */
+    JitValueSlot *frame_sp_begin;
+} JitBlock;
+
+/**
+ * Block stack, represents WASM block stack elements
+ */
+typedef struct JitBlockStack {
+    JitBlock *block_list_head;
+    JitBlock *block_list_end;
+} JitBlockStack;
+
+/**
+ * The JIT compilation context for one compilation process of a
+ * compilation unit.
+ */
+typedef struct JitCompContext {
+    /* Hard register information of each kind. */
+    const JitHardRegInfo *hreg_info;
+
+    /* No. of the pass to be applied. */
+    uint8 cur_pass_no;
+
+    /* The current wasm module */
+    WASMModule *cur_wasm_module;
+    /* The current wasm function */
+    WASMFunction *cur_wasm_func;
+    /* The current wasm function index */
+    uint32 cur_wasm_func_idx;
+    /* The block stack */
+    JitBlockStack block_stack;
+
+    bool mem_space_unchanged;
+
+    /* Entry and exit labels of the compilation unit, whose numbers must
+       be 0 and 1 respectively (see JIT_FOREACH_BLOCK). */
+    JitReg entry_label;
+    JitReg exit_label;
+    JitBasicBlock **exce_basic_blocks;
+    JitIncomingInsnList *incoming_insns_for_exec_bbs;
+
+    /* The current basic block to generate instructions */
+    JitBasicBlock *cur_basic_block;
+
+    /* Registers of frame pointer, exec_env and CMP result. */
+    JitReg fp_reg;
+    JitReg exec_env_reg;
+    JitReg cmp_reg;
+
+    /* WASM module instance */
+    JitReg module_inst_reg;
+    /* WASM module */
+    JitReg module_reg;
+    /* module_inst->fast_jit_func_ptrs */
+    JitReg fast_jit_func_ptrs_reg;
+    /* Base address of global data */
+    JitReg global_data_reg;
+    /* Boundary of auxiliary stack */
+    JitReg aux_stack_bound_reg;
+    /* Bottom of auxiliary stack */
+    JitReg aux_stack_bottom_reg;
+    /* Memory instances */
+    JitReg memories_reg;
+    /* Data of memory instances */
+    JitMemRegs *memory_regs;
+    /* Table instances */
+    JitReg tables_reg;
+    /* Data of table instances */
+    JitTableRegs *table_regs;
+
+    /* Current frame information for translation */
+    JitFrame *jit_frame;
+
+    /* The total frame size of current function */
+    uint32 total_frame_size;
+
+    /* The spill cache offset to the interp frame */
+    uint32 spill_cache_offset;
+    /* The spill cache size */
+    uint32 spill_cache_size;
+
+    /* The offset of jitted_return_address in the frame, which is set by
+       the pass frontend and used by the pass codegen. */
+    uint32 jitted_return_address_offset;
+
+    /* Begin and end addresses of the jitted code produced by the pass
+       codegen and consumed by the region registration after codegen and
+       the pass dump. */
+    void *jitted_addr_begin;
+    void *jitted_addr_end;
+
+    char last_error[128];
+
+    /* Below fields are all private.  Don't access them directly. */
+
+    /* Reference count of the compilation context. */
+    uint16 _reference_count;
+
+    /* Constant values. */
+    struct {
+        /* Number of constant values of each kind. */
+        uint32 _num[JIT_REG_KIND_L32];
+
+        /* Capacity of register annotations of each kind. */
+        uint32 _capacity[JIT_REG_KIND_L32];
+
+        /* Constant vallues of each kind. */
+        uint8 *_value[JIT_REG_KIND_L32];
+
+        /* Next element on the list of values with the same hash code. */
+        JitReg *_next[JIT_REG_KIND_L32];
+
+        /* Size of the hash table. */
+        uint32 _hash_table_size;
+
+        /* Map values to JIT register. */
+        JitReg *_hash_table;
+    } _const_val;
+
+    /* Annotations of labels, registers and instructions. */
+    struct {
+        /* Number of all ever created labels. */
+        uint32 _label_num;
+
+        /* Capacity of label annotations. */
+        uint32 _label_capacity;
+
+        /* Number of all ever created instructions. */
+        uint32 _insn_num;
+
+        /* Capacity of instruction annotations. */
+        uint32 _insn_capacity;
+
+        /* Number of ever created registers of each kind. */
+        uint32 _reg_num[JIT_REG_KIND_L32];
+
+        /* Capacity of register annotations of each kind. */
+        uint32 _reg_capacity[JIT_REG_KIND_L32];
+
+        /* Storage of annotations. */
+#define ANN_LABEL(TYPE, NAME) TYPE *_label_##NAME;
+#define ANN_INSN(TYPE, NAME) TYPE *_insn_##NAME;
+#define ANN_REG(TYPE, NAME) TYPE *_reg_##NAME[JIT_REG_KIND_L32];
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+        /* Flags of annotations. */
+#define ANN_LABEL(TYPE, NAME) uint32 _label_##NAME##_enabled : 1;
+#define ANN_INSN(TYPE, NAME) uint32 _insn_##NAME##_enabled : 1;
+#define ANN_REG(TYPE, NAME) uint32 _reg_##NAME##_enabled : 1;
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+    } _ann;
+
+    /* Instruction hash table. */
+    struct {
+        /* Size of the hash table. */
+        uint32 _size;
+
+        /* The hash table. */
+        JitInsn **_table;
+    } _insn_hash_table;
+
+    /* indicate if the last comparision is about floating-point numbers or not
+     */
+    bool last_cmp_on_fp;
+} JitCompContext;
+
+/*
+ * Annotation accessing functions jit_annl_NAME, jit_anni_NAME and
+ * jit_annr_NAME.
+ */
+#define ANN_LABEL(TYPE, NAME)                                             \
+    static inline TYPE *jit_annl_##NAME(JitCompContext *cc, JitReg label) \
+    {                                                                     \
+        unsigned idx = jit_reg_no(label);                                 \
+        bh_assert(jit_reg_kind(label) == JIT_REG_KIND_L32);               \
+        bh_assert(idx < cc->_ann._label_num);                             \
+        bh_assert(cc->_ann._label_##NAME##_enabled);                      \
+        return &cc->_ann._label_##NAME[idx];                              \
+    }
+#define ANN_INSN(TYPE, NAME)                                               \
+    static inline TYPE *jit_anni_##NAME(JitCompContext *cc, JitInsn *insn) \
+    {                                                                      \
+        unsigned uid = insn->uid;                                          \
+        bh_assert(uid < cc->_ann._insn_num);                               \
+        bh_assert(cc->_ann._insn_##NAME##_enabled);                        \
+        return &cc->_ann._insn_##NAME[uid];                                \
+    }
+#define ANN_REG(TYPE, NAME)                                             \
+    static inline TYPE *jit_annr_##NAME(JitCompContext *cc, JitReg reg) \
+    {                                                                   \
+        unsigned kind = jit_reg_kind(reg);                              \
+        unsigned no = jit_reg_no(reg);                                  \
+        bh_assert(kind < JIT_REG_KIND_L32);                             \
+        bh_assert(no < cc->_ann._reg_num[kind]);                        \
+        bh_assert(cc->_ann._reg_##NAME##_enabled);                      \
+        return &cc->_ann._reg_##NAME[kind][no];                         \
+    }
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+/*
+ * Annotation enabling functions jit_annl_enable_NAME,
+ * jit_anni_enable_NAME and jit_annr_enable_NAME, which allocate
+ * sufficient memory for the annotations.
+ */
+#define ANN_LABEL(TYPE, NAME) bool jit_annl_enable_##NAME(JitCompContext *cc);
+#define ANN_INSN(TYPE, NAME) bool jit_anni_enable_##NAME(JitCompContext *cc);
+#define ANN_REG(TYPE, NAME) bool jit_annr_enable_##NAME(JitCompContext *cc);
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+/*
+ * Annotation disabling functions jit_annl_disable_NAME,
+ * jit_anni_disable_NAME and jit_annr_disable_NAME, which release
+ * memory of the annotations.  Before calling these functions,
+ * resources owned by the annotations must be explictely released.
+ */
+#define ANN_LABEL(TYPE, NAME) void jit_annl_disable_##NAME(JitCompContext *cc);
+#define ANN_INSN(TYPE, NAME) void jit_anni_disable_##NAME(JitCompContext *cc);
+#define ANN_REG(TYPE, NAME) void jit_annr_disable_##NAME(JitCompContext *cc);
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+/*
+ * Functions jit_annl_is_enabled_NAME, jit_anni_is_enabled_NAME and
+ * jit_annr_is_enabled_NAME for checking whether an annotation is
+ * enabled.
+ */
+#define ANN_LABEL(TYPE, NAME)                                         \
+    static inline bool jit_annl_is_enabled_##NAME(JitCompContext *cc) \
+    {                                                                 \
+        return !!cc->_ann._label_##NAME##_enabled;                    \
+    }
+#define ANN_INSN(TYPE, NAME)                                          \
+    static inline bool jit_anni_is_enabled_##NAME(JitCompContext *cc) \
+    {                                                                 \
+        return !!cc->_ann._insn_##NAME##_enabled;                     \
+    }
+#define ANN_REG(TYPE, NAME)                                           \
+    static inline bool jit_annr_is_enabled_##NAME(JitCompContext *cc) \
+    {                                                                 \
+        return !!cc->_ann._reg_##NAME##_enabled;                      \
+    }
+#include "jit_ir.def"
+#undef ANN_LABEL
+#undef ANN_INSN
+#undef ANN_REG
+
+/**
+ * Initialize a compilation context.
+ *
+ * @param cc the compilation context
+ * @param htab_size the initial hash table size of constant pool
+ *
+ * @return cc if succeeds, NULL otherwise
+ */
+JitCompContext *
+jit_cc_init(JitCompContext *cc, unsigned htab_size);
+
+/**
+ * Release all resources of a compilation context, which doesn't
+ * include the compilation context itself.
+ *
+ * @param cc the compilation context
+ */
+void
+jit_cc_destroy(JitCompContext *cc);
+
+/**
+ * Increase the reference count of the compilation context.
+ *
+ * @param cc the compilation context
+ */
+static inline void
+jit_cc_inc_ref(JitCompContext *cc)
+{
+    cc->_reference_count++;
+}
+
+/**
+ * Decrease the reference_count and destroy and free the compilation
+ * context if the reference_count is decreased to zero.
+ *
+ * @param cc the compilation context
+ */
+void
+jit_cc_delete(JitCompContext *cc);
+
+char *
+jit_get_last_error(JitCompContext *cc);
+
+void
+jit_set_last_error(JitCompContext *cc, const char *error);
+
+void
+jit_set_last_error_v(JitCompContext *cc, const char *format, ...);
+
+/**
+ * Create a I32 constant value with relocatable into the compilation
+ * context. A constant value that has relocation info cannot be
+ * constant-folded as normal constants because its value depends on
+ * runtime context and may be different in different executions.
+ *
+ * @param cc compilation context
+ * @param val a I32 value
+ * @param rel relocation information
+ *
+ * @return a constant register containing the value
+ */
+JitReg
+jit_cc_new_const_I32_rel(JitCompContext *cc, int32 val, uint32 rel);
+
+/**
+ * Create a I32 constant value without relocation info (0) into the
+ * compilation context.
+ *
+ * @param cc compilation context
+ * @param val a I32 value
+ *
+ * @return a constant register containing the value
+ */
+static inline JitReg
+jit_cc_new_const_I32(JitCompContext *cc, int32 val)
+{
+    return jit_cc_new_const_I32_rel(cc, val, 0);
+}
+
+/**
+ * Create a I64 constant value into the compilation context.
+ *
+ * @param cc compilation context
+ * @param val a I64 value
+ *
+ * @return a constant register containing the value
+ */
+JitReg
+jit_cc_new_const_I64(JitCompContext *cc, int64 val);
+
+#if UINTPTR_MAX == UINT64_MAX
+#define jit_cc_new_const_PTR jit_cc_new_const_I64
+#else
+#define jit_cc_new_const_PTR jit_cc_new_const_I32
+#endif
+
+/**
+ * Create a F32 constant value into the compilation context.
+ *
+ * @param cc compilation context
+ * @param val a F32 value
+ *
+ * @return a constant register containing the value
+ */
+JitReg
+jit_cc_new_const_F32(JitCompContext *cc, float val);
+
+/**
+ * Create a F64 constant value into the compilation context.
+ *
+ * @param cc compilation context
+ * @param val a F64 value
+ *
+ * @return a constant register containing the value
+ */
+JitReg
+jit_cc_new_const_F64(JitCompContext *cc, double val);
+
+/**
+ * Get the relocation info of a I32 constant register.
+ *
+ * @param cc compilation context
+ * @param reg constant register
+ *
+ * @return the relocation info of the constant
+ */
+uint32
+jit_cc_get_const_I32_rel(JitCompContext *cc, JitReg reg);
+
+/**
+ * Get the constant value of a I32 constant register.
+ *
+ * @param cc compilation context
+ * @param reg constant register
+ *
+ * @return the constant value
+ */
+int32
+jit_cc_get_const_I32(JitCompContext *cc, JitReg reg);
+
+/**
+ * Get the constant value of a I64 constant register.
+ *
+ * @param cc compilation context
+ * @param reg constant register
+ *
+ * @return the constant value
+ */
+int64
+jit_cc_get_const_I64(JitCompContext *cc, JitReg reg);
+
+/**
+ * Get the constant value of a F32 constant register.
+ *
+ * @param cc compilation context
+ * @param reg constant register
+ *
+ * @return the constant value
+ */
+float
+jit_cc_get_const_F32(JitCompContext *cc, JitReg reg);
+
+/**
+ * Get the constant value of a F64 constant register.
+ *
+ * @param cc compilation context
+ * @param reg constant register
+ *
+ * @return the constant value
+ */
+double
+jit_cc_get_const_F64(JitCompContext *cc, JitReg reg);
+
+/**
+ * Get the number of total created labels.
+ *
+ * @param cc the compilation context
+ *
+ * @return the number of total created labels
+ */
+static inline unsigned
+jit_cc_label_num(JitCompContext *cc)
+{
+    return cc->_ann._label_num;
+}
+
+/**
+ * Get the number of total created instructions.
+ *
+ * @param cc the compilation context
+ *
+ * @return the number of total created instructions
+ */
+static inline unsigned
+jit_cc_insn_num(JitCompContext *cc)
+{
+    return cc->_ann._insn_num;
+}
+
+/**
+ * Get the number of total created registers.
+ *
+ * @param cc the compilation context
+ * @param kind the register kind
+ *
+ * @return the number of total created registers
+ */
+static inline unsigned
+jit_cc_reg_num(JitCompContext *cc, unsigned kind)
+{
+    bh_assert(kind < JIT_REG_KIND_L32);
+    return cc->_ann._reg_num[kind];
+}
+
+/**
+ * Create a new label in the compilation context.
+ *
+ * @param cc the compilation context
+ *
+ * @return a new label in the compilation context
+ */
+JitReg
+jit_cc_new_label(JitCompContext *cc);
+
+/**
+ * Create a new block with a new label in the compilation context.
+ *
+ * @param cc the compilation context
+ * @param n number of predecessors
+ *
+ * @return a new block with a new label in the compilation context
+ */
+JitBasicBlock *
+jit_cc_new_basic_block(JitCompContext *cc, int n);
+
+/**
+ * Resize the predecessor number of a block.
+ *
+ * @param cc the containing compilation context
+ * @param block block to be resized
+ * @param n new number of predecessors
+ *
+ * @return the new block if succeeds, NULL otherwise
+ */
+JitBasicBlock *
+jit_cc_resize_basic_block(JitCompContext *cc, JitBasicBlock *block, int n);
+
+/**
+ * Initialize the instruction hash table to the given size and enable
+ * the instruction's _hash_link annotation.
+ *
+ * @param cc the containing compilation context
+ * @param n size of the hash table
+ *
+ * @return true if succeeds, false otherwise
+ */
+bool
+jit_cc_enable_insn_hash(JitCompContext *cc, unsigned n);
+
+/**
+ * Destroy the instruction hash table and disable the instruction's
+ * _hash_link annotation.
+ *
+ * @param cc the containing compilation context
+ */
+void
+jit_cc_disable_insn_hash(JitCompContext *cc);
+
+/**
+ * Reset the hash table entries.
+ *
+ * @param cc the containing compilation context
+ */
+void
+jit_cc_reset_insn_hash(JitCompContext *cc);
+
+/**
+ * Allocate a new instruction ID in the compilation context and set it
+ * to the given instruction.
+ *
+ * @param cc the compilation context
+ * @param insn IR instruction
+ *
+ * @return the insn with uid being set
+ */
+JitInsn *
+jit_cc_set_insn_uid(JitCompContext *cc, JitInsn *insn);
+
+/*
+ * Similar to jit_cc_set_insn_uid except that if setting uid failed,
+ * delete the insn.  Only used by jit_cc_new_insn
+ */
+JitInsn *
+_jit_cc_set_insn_uid_for_new_insn(JitCompContext *cc, JitInsn *insn);
+
+/**
+ * Create a new instruction in the compilation context.
+ *
+ * @param cc the compilationo context
+ * @param NAME instruction name
+ *
+ * @return a new instruction in the compilation context
+ */
+#define jit_cc_new_insn(cc, NAME, ...) \
+    _jit_cc_set_insn_uid_for_new_insn(cc, jit_insn_new_##NAME(__VA_ARGS__))
+
+/*
+ * Helper function for jit_cc_new_insn_norm.
+ */
+JitInsn *
+_jit_cc_new_insn_norm(JitCompContext *cc, JitReg *result, JitInsn *insn);
+
+/**
+ * Create a new instruction in the compilation context and normalize
+ * the instruction (constant folding and simplification etc.). If the
+ * instruction hashing is enabled (anni__hash_link is enabled), try to
+ * find the existing equivalent insruction first before adding a new
+ * one to the compilation contest.
+ *
+ * @param cc the compilationo context
+ * @param result returned result of the instruction. If the value is
+ * non-zero, it is the result of the constant-folding or an exsiting
+ * equivalent instruction, in which case no instruction is added into
+ * the compilation context. Otherwise, a new normalized instruction
+ * has been added into the compilation context.
+ * @param NAME instruction name
+ *
+ * @return a new or existing instruction in the compilation context
+ */
+#define jit_cc_new_insn_norm(cc, result, NAME, ...) \
+    _jit_cc_new_insn_norm(cc, result, jit_insn_new_##NAME(__VA_ARGS__))
+
+/**
+ * Helper function for GEN_INSN
+ *
+ * @param cc compilation context
+ * @param block the current block
+ * @param insn the new instruction
+ *
+ * @return the new instruction if inserted, NULL otherwise
+ */
+static inline JitInsn *
+_gen_insn(JitCompContext *cc, JitInsn *insn)
+{
+    if (insn)
+        jit_basic_block_append_insn(cc->cur_basic_block, insn);
+    else
+        jit_set_last_error(cc, "generate insn failed");
+
+    return insn;
+}
+
+/**
+ * Generate and append an instruction to the current block.
+ */
+#define GEN_INSN(...) _gen_insn(cc, jit_cc_new_insn(cc, __VA_ARGS__))
+
+/**
+ * Create a constant register without relocation info.
+ *
+ * @param Type type of the register
+ * @param val the constant value
+ *
+ * @return the constant register if succeeds, 0 otherwise
+ */
+#define NEW_CONST(Type, val) jit_cc_new_const_##Type(cc, val)
+
+/**
+ * Create a new virtual register in the compilation context.
+ *
+ * @param cc the compilation context
+ * @param kind kind of the register
+ *
+ * @return a new label in the compilation context
+ */
+JitReg
+jit_cc_new_reg(JitCompContext *cc, unsigned kind);
+
+/*
+ * Create virtual registers with specific types in the compilation
+ * context. They are more convenient than the above one.
+ */
+
+static inline JitReg
+jit_cc_new_reg_I32(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_I32);
+}
+
+static inline JitReg
+jit_cc_new_reg_I64(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_I64);
+}
+
+#if UINTPTR_MAX == UINT64_MAX
+#define jit_cc_new_reg_ptr jit_cc_new_reg_I64
+#else
+#define jit_cc_new_reg_ptr jit_cc_new_reg_I32
+#endif
+
+static inline JitReg
+jit_cc_new_reg_F32(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_F32);
+}
+
+static inline JitReg
+jit_cc_new_reg_F64(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_F64);
+}
+
+static inline JitReg
+jit_cc_new_reg_V64(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_V64);
+}
+
+static inline JitReg
+jit_cc_new_reg_V128(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_V128);
+}
+
+static inline JitReg
+jit_cc_new_reg_V256(JitCompContext *cc)
+{
+    return jit_cc_new_reg(cc, JIT_REG_KIND_V256);
+}
+
+/**
+ * Get the hard register numbe of the given kind
+ *
+ * @param cc the compilation context
+ * @param kind the register kind
+ *
+ * @return number of hard registers of the given kind
+ */
+static inline unsigned
+jit_cc_hreg_num(JitCompContext *cc, unsigned kind)
+{
+    bh_assert(kind < JIT_REG_KIND_L32);
+    return cc->hreg_info->info[kind].num;
+}
+
+/**
+ * Check whether a given register is a hard register.
+ *
+ * @param cc the compilation context
+ * @param reg the register which must be a variable
+ *
+ * @return true if the register is a hard register
+ */
+static inline bool
+jit_cc_is_hreg(JitCompContext *cc, JitReg reg)
+{
+    unsigned kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    bh_assert(jit_reg_is_variable(reg));
+    return no < cc->hreg_info->info[kind].num;
+}
+
+/**
+ * Check whether the given hard register is fixed.
+ *
+ * @param cc the compilation context
+ * @param reg the hard register
+ *
+ * @return true if the hard register is fixed
+ */
+static inline bool
+jit_cc_is_hreg_fixed(JitCompContext *cc, JitReg reg)
+{
+    unsigned kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    bh_assert(jit_cc_is_hreg(cc, reg));
+    return !!cc->hreg_info->info[kind].fixed[no];
+}
+
+/**
+ * Check whether the given hard register is caller-saved-native.
+ *
+ * @param cc the compilation context
+ * @param reg the hard register
+ *
+ * @return true if the hard register is caller-saved-native
+ */
+static inline bool
+jit_cc_is_hreg_caller_saved_native(JitCompContext *cc, JitReg reg)
+{
+    unsigned kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    bh_assert(jit_cc_is_hreg(cc, reg));
+    return !!cc->hreg_info->info[kind].caller_saved_native[no];
+}
+
+/**
+ * Check whether the given hard register is caller-saved-jitted.
+ *
+ * @param cc the compilation context
+ * @param reg the hard register
+ *
+ * @return true if the hard register is caller-saved-jitted
+ */
+static inline bool
+jit_cc_is_hreg_caller_saved_jitted(JitCompContext *cc, JitReg reg)
+{
+    unsigned kind = jit_reg_kind(reg);
+    unsigned no = jit_reg_no(reg);
+    bh_assert(jit_cc_is_hreg(cc, reg));
+    return !!cc->hreg_info->info[kind].caller_saved_jitted[no];
+}
+
+/**
+ * Return the entry block of the compilation context.
+ *
+ * @param cc the compilation context
+ *
+ * @return the entry block of the compilation context
+ */
+static inline JitBasicBlock *
+jit_cc_entry_basic_block(JitCompContext *cc)
+{
+    return *(jit_annl_basic_block(cc, cc->entry_label));
+}
+
+/**
+ * Return the exit block of the compilation context.
+ *
+ * @param cc the compilation context
+ *
+ * @return the exit block of the compilation context
+ */
+static inline JitBasicBlock *
+jit_cc_exit_basic_block(JitCompContext *cc)
+{
+    return *(jit_annl_basic_block(cc, cc->exit_label));
+}
+
+void
+jit_value_stack_push(JitValueStack *stack, JitValue *value);
+
+JitValue *
+jit_value_stack_pop(JitValueStack *stack);
+
+void
+jit_value_stack_destroy(JitValueStack *stack);
+
+JitBlock *
+jit_block_stack_top(JitBlockStack *stack);
+
+void
+jit_block_stack_push(JitBlockStack *stack, JitBlock *block);
+
+JitBlock *
+jit_block_stack_pop(JitBlockStack *stack);
+
+void
+jit_block_stack_destroy(JitBlockStack *stack);
+
+bool
+jit_block_add_incoming_insn(JitBlock *block, JitInsn *insn, uint32 opnd_idx);
+
+void
+jit_block_destroy(JitBlock *block);
+
+bool
+jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value);
+
+bool
+jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value);
+
+bool
+jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock);
+
+/**
+ * Update the control flow graph after successors of blocks are
+ * changed so that the predecessor vector of each block represents the
+ * updated status. The predecessors may not be required by all
+ * passes, so we don't need to keep them always being updated.
+ *
+ * @param cc the compilation context
+ *
+ * @return true if succeeds, false otherwise
+ */
+bool
+jit_cc_update_cfg(JitCompContext *cc);
+
+/**
+ * Visit each normal block (which is not entry nor exit block) in a
+ * compilation context. New blocks can be added in the loop body, but
+ * they won't be visited. Blocks can also be removed safely (by
+ * setting the label's block annotation to NULL) in the loop body.
+ *
+ * @param CC (JitCompContext *) the compilation context
+ * @param I (unsigned) index variable of the block (label no)
+ * @param E (unsigned) end index variable of block (last index + 1)
+ * @param B (JitBasicBlock *) block pointer variable
+ */
+#define JIT_FOREACH_BLOCK(CC, I, E, B)                           \
+    for ((I) = 2, (E) = (CC)->_ann._label_num; (I) < (E); (I)++) \
+        if (((B) = (CC)->_ann._label_basic_block[(I)]))
+
+/**
+ * The version that includes entry and exit block.
+ */
+#define JIT_FOREACH_BLOCK_ENTRY_EXIT(CC, I, E, B)                \
+    for ((I) = 0, (E) = (CC)->_ann._label_num; (I) < (E); (I)++) \
+        if (((B) = (CC)->_ann._label_basic_block[(I)]))
+
+/**
+ * Visit each normal block (which is not entry nor exit block) in a
+ * compilation context in reverse order. New blocks can be added in
+ * the loop body, but they won't be visited. Blocks can also be
+ * removed safely (by setting the label's block annotation to NULL) in
+ * the loop body.
+ *
+ * @param CC (JitCompContext *) the compilation context
+ * @param I (unsigned) index of the block (label no)
+ * @param B (JitBasicBlock *) block pointer
+ */
+#define JIT_FOREACH_BLOCK_REVERSE(CC, I, B)           \
+    for ((I) = (CC)->_ann._label_num; (I) > 2; (I)--) \
+        if (((B) = (CC)->_ann._label_basic_block[(I)-1]))
+
+/**
+ * The version that includes entry and exit block.
+ */
+#define JIT_FOREACH_BLOCK_REVERSE_ENTRY_EXIT(CC, I, B) \
+    for ((I) = (CC)->_ann._label_num; (I) > 0; (I)--)  \
+        if (((B) = (CC)->_ann._label_basic_block[(I)-1]))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* end of _JIT_IR_H_ */

+ 840 - 0
core/iwasm/fast-jit/jit_regalloc.c

@@ -0,0 +1,840 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_utils.h"
+#include "jit_compiler.h"
+
+#if BH_DEBUG != 0
+#define VREG_DEF_SANITIZER
+#endif
+
+/**
+ * A uint16 stack for storing distances of occurrences of virtual
+ * registers.
+ */
+typedef struct UintStack {
+    /* Capacity of the stack.  */
+    uint32 capacity;
+
+    /* Top index of the stack.  */
+    uint32 top;
+
+    /* Elements of the vector.  */
+    uint32 elem[1];
+} UintStack;
+
+static bool
+uint_stack_push(UintStack **stack, unsigned val)
+{
+    unsigned capacity = *stack ? (*stack)->capacity : 0;
+    unsigned top = *stack ? (*stack)->top : 0;
+
+    bh_assert(top <= capacity);
+
+    if (top == capacity) {
+        const unsigned elem_size = sizeof((*stack)->elem[0]);
+        unsigned new_capacity = capacity ? capacity + capacity / 2 : 4;
+        UintStack *new_stack =
+            jit_malloc(offsetof(UintStack, elem) + elem_size * new_capacity);
+
+        if (!new_stack)
+            return false;
+
+        new_stack->capacity = new_capacity;
+        new_stack->top = top;
+
+        if (*stack)
+            memcpy(new_stack->elem, (*stack)->elem, elem_size * top);
+
+        jit_free(*stack);
+        *stack = new_stack;
+    }
+
+    (*stack)->elem[(*stack)->top++] = val;
+
+    return true;
+}
+
+static int
+uint_stack_top(UintStack *stack)
+{
+    return stack->elem[stack->top - 1];
+}
+
+static void
+uint_stack_delete(UintStack **stack)
+{
+    jit_free(*stack);
+    *stack = NULL;
+}
+
+static void
+uint_stack_pop(UintStack **stack)
+{
+    bh_assert((*stack)->top > 0);
+
+    /**
+     * TODO: the fact of empty distances stack means there is no instruction
+     * using current JitReg anymore. so shall we release the HardReg and clean
+     * VirtualReg information?
+     */
+    if (--(*stack)->top == 0)
+        uint_stack_delete(stack);
+}
+
+/**
+ * Information of a virtual register.
+ */
+typedef struct VirtualReg {
+    /* The hard register allocated to this virtual register.  */
+    JitReg hreg;
+
+    /* The spill slot allocated to this virtual register.  */
+    JitReg slot;
+
+    /* The hard register allocated to global virtual registers.  It is 0
+       for local registers, whose lifetime is within one basic block.  */
+    JitReg global_hreg;
+
+    /* Distances from the beginning of basic block of all occurrences of the
+       virtual register in the basic block.  */
+    UintStack *distances;
+} VirtualReg;
+
+/**
+ * Information of a hard register.
+ */
+typedef struct HardReg {
+    /* The virtual register this hard register is allocated to.  */
+    JitReg vreg;
+} HardReg;
+
+/**
+ * Information of a spill slot.
+ */
+typedef struct SpillSlot {
+    /* The virtual register this spill slot is allocated to.  */
+    JitReg vreg;
+} SpillSlot;
+
+typedef struct RegallocContext {
+    /* The compiler context.  */
+    JitCompContext *cc;
+
+    /* Information of virtual registers.  The register allocation must
+       not increase the virtual register number during the allocation
+       process.  */
+    VirtualReg *vregs[JIT_REG_KIND_L32];
+
+    /* Information of hard registers. */
+    HardReg *hregs[JIT_REG_KIND_L32];
+
+    /* Number of elements in the spill_slots array.  */
+    uint32 spill_slot_num;
+
+    /* Information of spill slots.  */
+    SpillSlot *spill_slots;
+
+    /* The last define-released hard register.  */
+    JitReg last_def_released_hreg;
+} RegallocContext;
+
+/**
+ * Get the VirtualReg structure of the given virtual register.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register
+ *
+ * @return the VirtualReg structure of the given virtual register
+ */
+static VirtualReg *
+rc_get_vr(RegallocContext *rc, JitReg vreg)
+{
+    unsigned kind = jit_reg_kind(vreg);
+    unsigned no = jit_reg_no(vreg);
+
+    bh_assert(jit_reg_is_variable(vreg));
+
+    return &rc->vregs[kind][no];
+}
+
+/**
+ * Get the HardReg structure of the given hard register.
+ *
+ * @param rc the regalloc context
+ * @param hreg the hard register
+ *
+ * @return the HardReg structure of the given hard register
+ */
+static HardReg *
+rc_get_hr(RegallocContext *rc, JitReg hreg)
+{
+    unsigned kind = jit_reg_kind(hreg);
+    unsigned no = jit_reg_no(hreg);
+
+    bh_assert(jit_reg_is_variable(hreg) && jit_cc_is_hreg(rc->cc, hreg));
+
+    return &rc->hregs[kind][no];
+}
+
+/**
+ * Get the SpillSlot structure of the given slot.
+ *
+ * @param rc the regalloc context
+ * @param slot the constant register representing the slot index
+ *
+ * @return the SpillSlot of the given slot
+ */
+static SpillSlot *
+rc_get_spill_slot(RegallocContext *rc, JitReg slot)
+{
+    unsigned index = jit_cc_get_const_I32(rc->cc, slot);
+
+    bh_assert(index < rc->spill_slot_num);
+
+    return &rc->spill_slots[index];
+}
+
+/**
+ * Get the stride in the spill slots of the register.
+ *
+ * @param reg a virtual register
+ *
+ * @return stride in the spill slots
+ */
+static unsigned
+get_reg_stride(JitReg reg)
+{
+    static const uint8 strides[] = { 0, 1, 2, 1, 2, 2, 4, 8, 0 };
+    return strides[jit_reg_kind(reg)];
+}
+
+/**
+ * Allocate a spill slot for the given virtual register.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register
+ *
+ * @return the spill slot encoded in a consant register
+ */
+static JitReg
+rc_alloc_spill_slot(RegallocContext *rc, JitReg vreg)
+{
+    const unsigned stride = get_reg_stride(vreg);
+    unsigned mask, new_num, i, j;
+    SpillSlot *slots;
+
+    bh_assert(stride > 0);
+
+    for (i = 0; i < rc->spill_slot_num; i += stride)
+        for (j = i;; j++) {
+            if (j == i + stride)
+                /* Found a free slot for vreg.  */
+                goto found;
+
+            if (rc->spill_slots[j].vreg)
+                break;
+        }
+
+    /* No free slot, increase the slot number.  */
+    mask = stride - 1;
+    /* Align the slot index.  */
+    i = (rc->spill_slot_num + mask) & ~mask;
+    new_num = i == 0 ? 32 : i + i / 2;
+
+    if (!(slots = jit_calloc(sizeof(*slots) * new_num)))
+        return 0;
+
+    if (rc->spill_slots)
+        memcpy(slots, rc->spill_slots, sizeof(*slots) * rc->spill_slot_num);
+
+    jit_free(rc->spill_slots);
+    rc->spill_slots = slots;
+    rc->spill_slot_num = new_num;
+
+found:
+    /* Now, i is the first slot for vreg.  */
+    if ((i + stride) * 4 > rc->cc->spill_cache_size)
+        /* No frame space for the spill area.  */
+        return 0;
+
+    /* Allocate the slot(s) to vreg.  */
+    for (j = i; j < i + stride; j++)
+        rc->spill_slots[j].vreg = vreg;
+
+    return jit_cc_new_const_I32(rc->cc, i);
+}
+
+/**
+ * Free a spill slot.
+ *
+ * @param rc the regalloc context
+ * @param slot_reg the constant register representing the slot index
+ */
+static void
+rc_free_spill_slot(RegallocContext *rc, JitReg slot_reg)
+{
+    if (slot_reg) {
+        SpillSlot *slot = rc_get_spill_slot(rc, slot_reg);
+        const JitReg vreg = slot->vreg;
+        const unsigned stride = get_reg_stride(vreg);
+        unsigned i;
+
+        for (i = 0; i < stride; i++)
+            slot[i].vreg = 0;
+    }
+}
+
+static void
+rc_destroy(RegallocContext *rc)
+{
+    unsigned i, j;
+
+    for (i = JIT_REG_KIND_VOID; i < JIT_REG_KIND_L32; i++) {
+        const unsigned vreg_num = jit_cc_reg_num(rc->cc, i);
+
+        if (rc->vregs[i])
+            for (j = 0; j < vreg_num; j++)
+                uint_stack_delete(&rc->vregs[i][j].distances);
+
+        jit_free(rc->vregs[i]);
+        jit_free(rc->hregs[i]);
+    }
+
+    jit_free(rc->spill_slots);
+}
+
+static bool
+rc_init(RegallocContext *rc, JitCompContext *cc)
+{
+    unsigned i, j;
+
+    memset(rc, 0, sizeof(*rc));
+    rc->cc = cc;
+
+    for (i = JIT_REG_KIND_VOID; i < JIT_REG_KIND_L32; i++) {
+        const unsigned vreg_num = jit_cc_reg_num(cc, i);
+        const unsigned hreg_num = jit_cc_hreg_num(cc, i);
+
+        if (vreg_num > 0
+            && !(rc->vregs[i] = jit_calloc(sizeof(VirtualReg) * vreg_num)))
+            goto fail;
+        if (hreg_num > 0
+            && !(rc->hregs[i] = jit_calloc(sizeof(HardReg) * hreg_num)))
+            goto fail;
+
+        /* Hard registers can only be allocated to themselves.  */
+        for (j = 0; j < hreg_num; j++)
+            rc->vregs[i][j].global_hreg = jit_reg_new(i, j);
+    }
+
+    return true;
+
+fail:
+    rc_destroy(rc);
+
+    return false;
+}
+
+/**
+ * Check whether the given register is an allocation candidate, which
+ * must be a variable register that is not fixed hard register.
+ *
+ * @param cc the compilation context
+ * @param reg the register
+ *
+ * @return true if the register is an allocation candidate
+ */
+static bool
+is_alloc_candidate(JitCompContext *cc, JitReg reg)
+{
+    return (jit_reg_is_variable(reg)
+            && (!jit_cc_is_hreg(cc, reg) || !jit_cc_is_hreg_fixed(cc, reg)));
+}
+
+#ifdef VREG_DEF_SANITIZER
+static void
+check_vreg_definition(RegallocContext *rc, JitInsn *insn)
+{
+    JitRegVec regvec = jit_insn_opnd_regs(insn);
+    JitReg *regp, reg_defined = 0;
+    unsigned i, first_use = jit_insn_opnd_first_use(insn);
+
+    /* check if there is the definition of an vr before its references */
+    JIT_REG_VEC_FOREACH(regvec, i, regp)
+    {
+        VirtualReg *vr = NULL;
+
+        if (!is_alloc_candidate(rc->cc, *regp))
+            continue;
+
+        /* a strong assumption that there is only one defined reg */
+        if (i < first_use) {
+            reg_defined = *regp;
+            continue;
+        }
+
+        /**
+         * both definition and references are in one instruction,
+         * like MOV i3, i3
+         */
+        if (reg_defined == *regp)
+            continue;
+
+        vr = rc_get_vr(rc, *regp);
+        bh_assert(vr->distances);
+    }
+}
+#endif
+
+/**
+ * Collect distances from the beginning of basic block of all occurrences of
+ * each virtual register.
+ *
+ * @param rc the regalloc context
+ * @param basic_block the basic block
+ *
+ * @return distance of the end instruction if succeeds, -1 otherwise
+ */
+static int
+collect_distances(RegallocContext *rc, JitBasicBlock *basic_block)
+{
+    JitInsn *insn;
+    int distance = 1;
+
+    JIT_FOREACH_INSN(basic_block, insn)
+    {
+        JitRegVec regvec = jit_insn_opnd_regs(insn);
+        unsigned i;
+        JitReg *regp;
+
+#ifdef VREG_DEF_SANITIZER
+        check_vreg_definition(rc, insn);
+#endif
+
+        /* NOTE: the distance may be pushed more than once if the
+           virtual register occurs multiple times in the
+           instruction.  */
+        JIT_REG_VEC_FOREACH(regvec, i, regp)
+        if (is_alloc_candidate(rc->cc, *regp))
+            if (!uint_stack_push(&(rc_get_vr(rc, *regp))->distances, distance))
+                return -1;
+
+        /* Integer overflow check, normally it won't happen, but
+           we had better add the check here */
+        if (distance >= INT32_MAX)
+            return -1;
+
+        distance++;
+    }
+
+    return distance;
+}
+
+static JitReg
+offset_of_spill_slot(JitCompContext *cc, JitReg slot)
+{
+    return jit_cc_new_const_I32(cc, cc->spill_cache_offset
+                                        + jit_cc_get_const_I32(cc, slot) * 4);
+}
+
+/**
+ * Reload the virtual register from memory.  Reload instruction will
+ * be inserted after the given instruction.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register to be reloaded
+ * @param cur_insn the current instruction after which the reload
+ * insertion will be inserted
+ *
+ * @return the reload instruction if succeeds, NULL otherwise
+ */
+static JitInsn *
+reload_vreg(RegallocContext *rc, JitReg vreg, JitInsn *cur_insn)
+{
+    VirtualReg *vr = rc_get_vr(rc, vreg);
+    HardReg *hr = rc_get_hr(rc, vr->hreg);
+    JitInsn *insn = NULL;
+
+    if (vreg == rc->cc->exec_env_reg)
+        /* Reload exec_env_reg with LDEXECENV.  */
+        insn = jit_cc_new_insn(rc->cc, LDEXECENV, vr->hreg);
+    else
+    /* Allocate spill slot if not yet and reload from there.  */
+    {
+        JitReg fp_reg = rc->cc->fp_reg, offset;
+
+        if (!vr->slot && !(vr->slot = rc_alloc_spill_slot(rc, vreg)))
+            /* Cannot allocte spill slot (due to OOM or frame size limit).  */
+            return NULL;
+
+        offset = offset_of_spill_slot(rc->cc, vr->slot);
+
+        switch (jit_reg_kind(vreg)) {
+            case JIT_REG_KIND_I32:
+                insn = jit_cc_new_insn(rc->cc, LDI32, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_I64:
+                insn = jit_cc_new_insn(rc->cc, LDI64, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_F32:
+                insn = jit_cc_new_insn(rc->cc, LDF32, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_F64:
+                insn = jit_cc_new_insn(rc->cc, LDF64, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_V64:
+                insn = jit_cc_new_insn(rc->cc, LDV64, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_V128:
+                insn =
+                    jit_cc_new_insn(rc->cc, LDV128, vr->hreg, fp_reg, offset);
+                break;
+            case JIT_REG_KIND_V256:
+                insn =
+                    jit_cc_new_insn(rc->cc, LDV256, vr->hreg, fp_reg, offset);
+                break;
+            default:
+                bh_assert(0);
+        }
+    }
+
+    if (insn)
+        jit_insn_insert_after(cur_insn, insn);
+
+    bh_assert(hr->vreg == vreg);
+    hr->vreg = vr->hreg = 0;
+
+    return insn;
+}
+
+/**
+ * Spill the virtual register (which cannot be exec_env_reg) to memory.
+ * Spill instruction will be inserted after the given instruction.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register to be reloaded
+ * @param cur_insn the current instruction after which the reload
+ * insertion will be inserted
+ *
+ * @return the spill instruction if succeeds, NULL otherwise
+ */
+static JitInsn *
+spill_vreg(RegallocContext *rc, JitReg vreg, JitInsn *cur_insn)
+{
+    VirtualReg *vr = rc_get_vr(rc, vreg);
+    JitReg fp_reg = rc->cc->fp_reg, offset;
+    JitInsn *insn;
+
+    /* There is no chance to spill exec_env_reg.  */
+    bh_assert(vreg != rc->cc->exec_env_reg);
+    bh_assert(vr->hreg && vr->slot);
+    offset = offset_of_spill_slot(rc->cc, vr->slot);
+
+    switch (jit_reg_kind(vreg)) {
+        case JIT_REG_KIND_I32:
+            insn = jit_cc_new_insn(rc->cc, STI32, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_I64:
+            insn = jit_cc_new_insn(rc->cc, STI64, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_F32:
+            insn = jit_cc_new_insn(rc->cc, STF32, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_F64:
+            insn = jit_cc_new_insn(rc->cc, STF64, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_V64:
+            insn = jit_cc_new_insn(rc->cc, STV64, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_V128:
+            insn = jit_cc_new_insn(rc->cc, STV128, vr->hreg, fp_reg, offset);
+            break;
+        case JIT_REG_KIND_V256:
+            insn = jit_cc_new_insn(rc->cc, STV256, vr->hreg, fp_reg, offset);
+            break;
+        default:
+            bh_assert(0);
+            return NULL;
+    }
+
+    if (insn)
+        jit_insn_insert_after(cur_insn, insn);
+
+    return insn;
+}
+
+/**
+ * Allocate a hard register for the virtual register.  Necessary
+ * reloade instruction will be inserted after the given instruction.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register
+ * @param insn the instruction after which the reload insertion will
+ * be inserted
+ * @param distance the distance of the current instruction
+ *
+ * @return the hard register allocated if succeeds, 0 otherwise
+ */
+static JitReg
+allocate_hreg(RegallocContext *rc, JitReg vreg, JitInsn *insn, int distance)
+{
+    const int kind = jit_reg_kind(vreg);
+    const HardReg *hregs = rc->hregs[kind];
+    const unsigned hreg_num = jit_cc_hreg_num(rc->cc, kind);
+    JitReg hreg, vreg_to_reload = 0;
+    int min_distance = distance, vr_distance;
+    VirtualReg *vr = rc_get_vr(rc, vreg);
+    unsigned i;
+
+    if (hreg_num == 0)
+    /* Unsupported hard register kind.  */
+    {
+        jit_set_last_error(rc->cc, "unsupported hard register kind");
+        return 0;
+    }
+
+    if (vr->global_hreg)
+    /* It has globally allocated register, we can only use it.  */
+    {
+        if ((vreg_to_reload = (rc_get_hr(rc, vr->global_hreg))->vreg))
+            if (!reload_vreg(rc, vreg_to_reload, insn))
+                return 0;
+
+        return vr->global_hreg;
+    }
+
+    /* Use the last define-released register if its kind is correct and
+       it's free so as to optimize for two-operand instructions.  */
+    if (jit_reg_kind(rc->last_def_released_hreg) == kind
+        && (rc_get_hr(rc, rc->last_def_released_hreg))->vreg == 0)
+        return rc->last_def_released_hreg;
+
+    /* No hint given, just try to pick any free register.  */
+    for (i = 0; i < hreg_num; i++) {
+        hreg = jit_reg_new(kind, i);
+
+        if (jit_cc_is_hreg_fixed(rc->cc, hreg))
+            continue;
+
+        if (hregs[i].vreg == 0)
+            /* Found a free one, return it.  */
+            return hreg;
+    }
+
+    /* No free registers, need to spill and reload one.  */
+    for (i = 0; i < hreg_num; i++) {
+        if (jit_cc_is_hreg_fixed(rc->cc, jit_reg_new(kind, i)))
+            continue;
+
+        vr = rc_get_vr(rc, hregs[i].vreg);
+        /* TODO: since the hregs[i] is in use, its distances should be valid */
+        vr_distance = vr->distances ? uint_stack_top(vr->distances) : 0;
+
+        if (vr_distance < min_distance) {
+            min_distance = vr_distance;
+            vreg_to_reload = hregs[i].vreg;
+            hreg = jit_reg_new(kind, i);
+        }
+    }
+
+    bh_assert(min_distance < distance);
+
+    if (!reload_vreg(rc, vreg_to_reload, insn))
+        return 0;
+
+    return hreg;
+}
+
+/**
+ * Allocate a hard register for the virtual register if not allocated
+ * yet.  Necessary spill and reloade instructions will be inserted
+ * before/after and after the given instruction.  This operation will
+ * convert the virtual register's state from 1 or 3 to 2.
+ *
+ * @param rc the regalloc context
+ * @param vreg the virtual register
+ * @param insn the instruction after which the spill and reload
+ * insertions will be inserted
+ * @param distance the distance of the current instruction
+ *
+ * @return the hard register allocated to the virtual register if
+ * succeeds, 0 otherwise
+ */
+static JitReg
+allocate_for_vreg(RegallocContext *rc, JitReg vreg, JitInsn *insn, int distance)
+{
+    VirtualReg *vr = rc_get_vr(rc, vreg);
+
+    if (vr->hreg)
+        /* It has had a hard register, reuse it.  */
+        return vr->hreg;
+
+    /* Not allocated yet.  */
+    if ((vr->hreg = allocate_hreg(rc, vreg, insn, distance)))
+        (rc_get_hr(rc, vr->hreg))->vreg = vreg;
+
+    return vr->hreg;
+}
+
+/**
+ * Clobber live registers.
+ *
+ * @param rc the regalloc context
+ * @param is_native whether it's native ABI or JITed ABI
+ * @param insn the instruction after which the reload insertion will
+ * be inserted
+ *
+ * @return true if succeeds, false otherwise
+ */
+static bool
+clobber_live_regs(RegallocContext *rc, bool is_native, JitInsn *insn)
+{
+    unsigned i, j;
+
+    for (i = JIT_REG_KIND_VOID; i < JIT_REG_KIND_L32; i++) {
+        const unsigned hreg_num = jit_cc_hreg_num(rc->cc, i);
+
+        for (j = 0; j < hreg_num; j++) {
+            JitReg hreg = jit_reg_new(i, j);
+            bool caller_saved =
+                (is_native ? jit_cc_is_hreg_caller_saved_native(rc->cc, hreg)
+                           : jit_cc_is_hreg_caller_saved_jitted(rc->cc, hreg));
+
+            if (caller_saved && rc->hregs[i][j].vreg)
+                if (!reload_vreg(rc, rc->hregs[i][j].vreg, insn))
+                    return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * Do local register allocation for the given basic block
+ *
+ * @param rc the regalloc context
+ * @param basic_block the basic block
+ * @param distance the distance of the last instruction of the basic block
+ *
+ * @return true if succeeds, false otherwise
+ */
+static bool
+allocate_for_basic_block(RegallocContext *rc, JitBasicBlock *basic_block,
+                         int distance)
+{
+    JitInsn *insn;
+
+    JIT_FOREACH_INSN_REVERSE(basic_block, insn)
+    {
+        JitRegVec regvec = jit_insn_opnd_regs(insn);
+        unsigned first_use = jit_insn_opnd_first_use(insn);
+        unsigned i;
+        JitReg *regp;
+
+        distance--;
+
+        JIT_REG_VEC_FOREACH_DEF(regvec, i, regp, first_use)
+        if (is_alloc_candidate(rc->cc, *regp)) {
+            const JitReg vreg = *regp;
+            VirtualReg *vr = rc_get_vr(rc, vreg);
+
+            if (!(*regp = allocate_for_vreg(rc, vreg, insn, distance)))
+                return false;
+
+            /* Spill the register if required.  */
+            if (vr->slot && !spill_vreg(rc, vreg, insn))
+                return false;
+
+            bh_assert(uint_stack_top(vr->distances) == distance);
+            uint_stack_pop(&vr->distances);
+            /* Record the define-released hard register.  */
+            rc->last_def_released_hreg = vr->hreg;
+            /* Release the hreg and spill slot. */
+            rc_free_spill_slot(rc, vr->slot);
+            (rc_get_hr(rc, vr->hreg))->vreg = 0;
+            vr->hreg = vr->slot = 0;
+        }
+
+        if (insn->opcode == JIT_OP_CALLBC) {
+            if (!clobber_live_regs(rc, false, insn))
+                return false;
+
+            /* The exec_env_reg is implicitly used by the callee.  */
+            if (!allocate_for_vreg(rc, rc->cc->exec_env_reg, insn, distance))
+                return false;
+        }
+        else if (insn->opcode == JIT_OP_CALLNATIVE) {
+            if (!clobber_live_regs(rc, true, insn))
+                return false;
+        }
+
+        JIT_REG_VEC_FOREACH_USE(regvec, i, regp, first_use)
+        if (is_alloc_candidate(rc->cc, *regp)) {
+            if (!allocate_for_vreg(rc, *regp, insn, distance))
+                return false;
+        }
+
+        JIT_REG_VEC_FOREACH_USE(regvec, i, regp, first_use)
+        if (is_alloc_candidate(rc->cc, *regp)) {
+            VirtualReg *vr = rc_get_vr(rc, *regp);
+            bh_assert(uint_stack_top(vr->distances) == distance);
+            uint_stack_pop(&vr->distances);
+            /* be sure that the hreg exists and hasn't been spilled out */
+            bh_assert(vr->hreg != 0);
+            *regp = vr->hreg;
+        }
+    }
+
+    return true;
+}
+
+bool
+jit_pass_regalloc(JitCompContext *cc)
+{
+    RegallocContext rc = { 0 };
+    unsigned label_index, end_label_index;
+    JitBasicBlock *basic_block;
+    VirtualReg *self_vr;
+    bool retval = false;
+
+    if (!rc_init(&rc, cc))
+        return false;
+
+    /* NOTE: don't allocate new virtual registers during allocation
+       because the rc->vregs array is fixed size.  */
+
+    /* TODO: allocate hard registers for global virtual registers here.
+       Currently, exec_env_reg is the only global virtual register.  */
+    self_vr = rc_get_vr(&rc, cc->exec_env_reg);
+
+    JIT_FOREACH_BLOCK_ENTRY_EXIT(cc, label_index, end_label_index, basic_block)
+    {
+        int distance;
+
+        /* TODO: initialize hreg for live-out registers.  */
+        self_vr->hreg = self_vr->global_hreg;
+        (rc_get_hr(&rc, cc->exec_env_reg))->vreg = cc->exec_env_reg;
+
+        /**
+         * TODO: the allocation of a basic block keeps using vregs[]
+         * and hregs[] from previous basic block
+         */
+        if ((distance = collect_distances(&rc, basic_block)) < 0)
+            goto cleanup_and_return;
+
+        if (!allocate_for_basic_block(&rc, basic_block, distance))
+            goto cleanup_and_return;
+
+        /* TODO: generate necessary spills for live-in registers.  */
+    }
+
+    retval = true;
+
+cleanup_and_return:
+    rc_destroy(&rc);
+
+    return retval;
+}

+ 19 - 0
core/iwasm/fast-jit/jit_utils.c

@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_utils.h"
+
+JitBitmap *
+jit_bitmap_new(uintptr_t begin_index, unsigned bitnum)
+{
+    JitBitmap *bitmap;
+
+    if ((bitmap = jit_calloc(offsetof(JitBitmap, map) + (bitnum + 7) / 8))) {
+        bitmap->begin_index = begin_index;
+        bitmap->end_index = begin_index + bitnum;
+    }
+
+    return bitmap;
+}

+ 136 - 0
core/iwasm/fast-jit/jit_utils.h

@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2021 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _JIT_UTILS_H_
+#define _JIT_UTILS_H_
+
+#include "bh_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * A simple fixed size bitmap.
+ */
+typedef struct JitBitmap {
+    /* The first valid bit index.  */
+    uintptr_t begin_index;
+
+    /* The last valid bit index plus one.  */
+    uintptr_t end_index;
+
+    /* The bitmap.  */
+    uint8 map[1];
+} JitBitmap;
+
+static inline void *
+jit_malloc(unsigned int size)
+{
+    return wasm_runtime_malloc(size);
+}
+
+static inline void *
+jit_calloc(unsigned int size)
+{
+    void *ret = wasm_runtime_malloc(size);
+    if (ret) {
+        memset(ret, 0, size);
+    }
+    return ret;
+}
+
+static inline void
+jit_free(void *ptr)
+{
+    if (ptr)
+        wasm_runtime_free(ptr);
+}
+
+/**
+ * Create a new bitmap.
+ *
+ * @param begin_index the first valid bit index
+ * @param bitnum maximal bit number of the bitmap.
+ *
+ * @return the new bitmap if succeeds, NULL otherwise.
+ */
+JitBitmap *
+jit_bitmap_new(uintptr_t begin_index, unsigned bitnum);
+
+/**
+ * Delete a bitmap.
+ *
+ * @param bitmap the bitmap to be deleted
+ */
+static inline void
+jit_bitmap_delete(JitBitmap *bitmap)
+{
+    jit_free(bitmap);
+}
+
+/**
+ * Check whether the given index is in the range of the bitmap.
+ *
+ * @param bitmap the bitmap
+ * @param n the bit index
+ *
+ * @return true if the index is in range, false otherwise
+ */
+static inline bool
+jit_bitmap_is_in_range(JitBitmap *bitmap, unsigned n)
+{
+    return n >= bitmap->begin_index && n < bitmap->end_index;
+}
+
+/**
+ * Get a bit in the bitmap
+ *
+ * @param bitmap the bitmap
+ * @param n the n-th bit to be get
+ *
+ * @return value of the bit
+ */
+static inline int
+jit_bitmap_get_bit(JitBitmap *bitmap, unsigned n)
+{
+    unsigned idx = n - bitmap->begin_index;
+    bh_assert(n >= bitmap->begin_index && n < bitmap->end_index);
+    return (bitmap->map[idx / 8] >> (idx % 8)) & 1;
+}
+
+/**
+ * Set a bit in the bitmap.
+ *
+ * @param bitmap the bitmap
+ * @param n the n-th bit to be set
+ */
+static inline void
+jit_bitmap_set_bit(JitBitmap *bitmap, unsigned n)
+{
+    unsigned idx = n - bitmap->begin_index;
+    bh_assert(n >= bitmap->begin_index && n < bitmap->end_index);
+    bitmap->map[idx / 8] |= 1 << (idx % 8);
+}
+
+/**
+ * Clear a bit in the bitmap.
+ *
+ * @param bitmap the bitmap
+ * @param n the n-th bit to be cleared
+ */
+static inline void
+jit_bitmap_clear_bit(JitBitmap *bitmap, unsigned n)
+{
+    unsigned idx = n - bitmap->begin_index;
+    bh_assert(n >= bitmap->begin_index && n < bitmap->end_index);
+    bitmap->map[idx / 8] &= ~(1 << (idx % 8));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

+ 3 - 0
core/iwasm/include/wasm_export.h

@@ -139,6 +139,9 @@ typedef struct RuntimeInitArgs {
     char ip_addr[128];
     int platform_port;
     int instance_port;
+
+    /* Fast JIT code cache size */
+    uint32_t fast_jit_code_cache_size;
 } RuntimeInitArgs;
 
 #ifndef WASM_VALKIND_T_DEFINED

+ 21 - 1
core/iwasm/interpreter/wasm.h

@@ -204,6 +204,10 @@ typedef struct WASMGlobalImport {
     WASMModule *import_module;
     WASMGlobal *import_global_linked;
 #endif
+#if WASM_ENABLE_FAST_JIT != 0
+    /* The data offset of current global in global data */
+    uint32 data_offset;
+#endif
 } WASMGlobalImport;
 
 typedef struct WASMImport {
@@ -254,12 +258,19 @@ struct WASMFunction {
     uint8 *consts;
     uint32 const_cell_num;
 #endif
+#if WASM_ENABLE_FAST_JIT != 0
+    void *fast_jit_jitted_code;
+#endif
 };
 
 struct WASMGlobal {
     uint8 type;
     bool is_mutable;
     InitializerExpression init_expr;
+#if WASM_ENABLE_FAST_JIT != 0
+    /* The data offset of current global in global data */
+    uint32 data_offset;
+#endif
 };
 
 typedef struct WASMExport {
@@ -443,9 +454,12 @@ struct WASMModule {
 #if WASM_ENABLE_DEBUG_INTERP != 0 || WASM_ENABLE_DEBUG_AOT != 0
     bh_list fast_opcode_list;
     uint8 *buf_code;
+    uint64 buf_code_size;
+#endif
+#if WASM_ENABLE_DEBUG_INTERP != 0 || WASM_ENABLE_DEBUG_AOT != 0 \
+    || WASM_ENABLE_FAST_JIT != 0
     uint8 *load_addr;
     uint64 load_size;
-    uint64 buf_code_size;
 #endif
 
 #if WASM_ENABLE_DEBUG_INTERP != 0
@@ -470,6 +484,11 @@ struct WASMModule {
 #if WASM_ENABLE_LOAD_CUSTOM_SECTION != 0
     WASMCustomSection *custom_section_list;
 #endif
+
+#if WASM_ENABLE_FAST_JIT != 0
+    /* point to JITed functions */
+    void **fast_jit_func_ptrs;
+#endif
 };
 
 typedef struct BlockType {
@@ -612,6 +631,7 @@ wasm_get_smallest_type_idx(WASMType **types, uint32 type_count,
         if (wasm_type_equal(types[cur_type_idx], types[i]))
             return i;
     }
+    (void)type_count;
     return cur_type_idx;
 }
 

+ 11 - 6
core/iwasm/interpreter/wasm_interp.h

@@ -26,6 +26,10 @@ typedef struct WASMInterpFrame {
     /* Instruction pointer of the bytecode array.  */
     uint8 *ip;
 
+#if WASM_ENABLE_FAST_JIT != 0
+    uint8 *jitted_return_addr;
+#endif
+
 #if WASM_ENABLE_PERF_PROFILING != 0
     uint64 time_started;
 #endif
@@ -47,12 +51,13 @@ typedef struct WASMInterpFrame {
     WASMBranchBlock *csp_boundary;
     WASMBranchBlock *csp;
 
-    /* Frame data, the layout is:
-       lp: param_cell_count + local_cell_count
-       sp_bottom to sp_boundary: stack of data
-       csp_bottom to csp_boundary: stack of block
-       ref to frame end: data types of local vairables and stack data
-       */
+    /**
+     * Frame data, the layout is:
+     *  lp: parameters and local variables
+     *  sp_bottom to sp_boundary: wasm operand stack
+     *  csp_bottom to csp_boundary: wasm label stack
+     *  jit spill cache: only available for fast jit
+     */
     uint32 lp[1];
 #endif
 } WASMInterpFrame;

+ 66 - 0
core/iwasm/interpreter/wasm_interp_classic.c

@@ -16,6 +16,9 @@
 #include "../libraries/thread-mgr/thread_manager.h"
 #include "../libraries/debug-engine/debug_engine.h"
 #endif
+#if WASM_ENABLE_FAST_JIT != 0
+#include "../fast-jit/jit_compiler.h"
+#endif
 
 typedef int32 CellType_I32;
 typedef int64 CellType_I64;
@@ -855,6 +858,20 @@ wasm_interp_call_func_native(WASMModuleInstance *module_inst,
     wasm_exec_env_set_cur_frame(exec_env, prev_frame);
 }
 
+#if WASM_ENABLE_FAST_JIT != 0
+bool
+jit_invoke_native(WASMExecEnv *exec_env, uint32 func_idx,
+                  WASMInterpFrame *prev_frame)
+{
+    WASMModuleInstance *module_inst =
+        (WASMModuleInstance *)exec_env->module_inst;
+    WASMFunctionInstance *cur_func = module_inst->functions + func_idx;
+
+    wasm_interp_call_func_native(module_inst, exec_env, cur_func, prev_frame);
+    return wasm_get_exception(module_inst) ? false : true;
+}
+#endif
+
 #if WASM_ENABLE_MULTI_MODULE != 0
 static void
 wasm_interp_call_func_bytecode(WASMModuleInstance *module,
@@ -3897,7 +3914,56 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
         }
     }
     else {
+#if WASM_ENABLE_FAST_JIT == 0
         wasm_interp_call_func_bytecode(module_inst, exec_env, function, frame);
+#else
+        JitGlobals *jit_globals = jit_compiler_get_jit_globals();
+        JitInterpSwitchInfo info;
+        WASMType *func_type = function->u.func->func_type;
+        uint8 type = func_type->result_count
+                         ? func_type->types[func_type->param_count]
+                         : VALUE_TYPE_VOID;
+
+#if WASM_ENABLE_REF_TYPES != 0
+        if (type == VALUE_TYPE_EXTERNREF || type == VALUE_TYPE_FUNCREF)
+            type = VALUE_TYPE_I32;
+#endif
+
+        info.out.ret.last_return_type = type;
+        info.frame = frame;
+        frame->jitted_return_addr =
+            (uint8 *)jit_globals->return_to_interp_from_jitted;
+        jit_interp_switch_to_jitted(exec_env, &info,
+                                    function->u.func->fast_jit_jitted_code);
+        if (func_type->result_count) {
+            switch (type) {
+                case VALUE_TYPE_I32:
+                    *(frame->sp - function->ret_cell_num) =
+                        info.out.ret.ival[0];
+                    break;
+                case VALUE_TYPE_I64:
+                    *(frame->sp - function->ret_cell_num) =
+                        info.out.ret.ival[0];
+                    *(frame->sp - function->ret_cell_num + 1) =
+                        info.out.ret.ival[1];
+                    break;
+                case VALUE_TYPE_F32:
+                    *(frame->sp - function->ret_cell_num) =
+                        info.out.ret.fval[0];
+                    break;
+                case VALUE_TYPE_F64:
+                    *(frame->sp - function->ret_cell_num) =
+                        info.out.ret.fval[0];
+                    *(frame->sp - function->ret_cell_num + 1) =
+                        info.out.ret.fval[1];
+                    break;
+                default:
+                    bh_assert(0);
+                    break;
+            }
+        }
+        (void)wasm_interp_call_func_bytecode;
+#endif
     }
 
     /* Output the return value to the caller */

+ 55 - 4
core/iwasm/interpreter/wasm_loader.c

@@ -14,6 +14,10 @@
 #if WASM_ENABLE_DEBUG_INTERP != 0
 #include "../libraries/debug-engine/debug_engine.h"
 #endif
+#if WASM_ENABLE_FAST_JIT != 0
+#include "../fast-jit/jit_compiler.h"
+#include "../fast-jit/jit_codecache.h"
+#endif
 
 /* Read a value of given type from the address pointed to by the given
    pointer and increase the pointer to the position just after the
@@ -2890,6 +2894,28 @@ fail:
     return false;
 }
 
+#if WASM_ENABLE_FAST_JIT != 0
+static void
+calculate_global_data_offset(WASMModule *module)
+{
+    uint32 i, data_offset;
+
+    data_offset = 0;
+    for (i = 0; i < module->import_global_count; i++) {
+        WASMGlobalImport *import_global =
+            &((module->import_globals + i)->u.global);
+        import_global->data_offset = data_offset;
+        data_offset += wasm_value_type_size(import_global->type);
+    }
+
+    for (i = 0; i < module->global_count; i++) {
+        WASMGlobal *global = module->globals + i;
+        global->data_offset = data_offset;
+        data_offset += wasm_value_type_size(global->type);
+    }
+}
+#endif
+
 static bool
 wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
                              uint32 cur_func_idx, char *error_buf,
@@ -3277,6 +3303,21 @@ load_from_sections(WASMModule *module, WASMSection *sections,
 #endif
     }
 
+#if WASM_ENABLE_FAST_JIT != 0
+    calculate_global_data_offset(module);
+
+    if (module->function_count
+        && !(module->fast_jit_func_ptrs =
+                 loader_malloc(sizeof(void *) * module->function_count,
+                               error_buf, error_buf_size))) {
+        return false;
+    }
+    if (!jit_compiler_compile_all(module)) {
+        set_error_buf(error_buf, error_buf_size, "fast jit compilation failed");
+        return false;
+    }
+#endif
+
 #if WASM_ENABLE_MEMORY_TRACING != 0
     wasm_runtime_dump_module_mem_consumption((WASMModuleCommon *)module);
 #endif
@@ -3652,7 +3693,7 @@ wasm_loader_load(uint8 *buf, uint32 size,
         return NULL;
     }
 
-#if WASM_ENABLE_DEBUG_INTERP != 0
+#if WASM_ENABLE_DEBUG_INTERP != 0 || WASM_ENABLE_FAST_JIT != 0
     module->load_addr = (uint8 *)buf;
     module->load_size = size;
 #endif
@@ -3800,6 +3841,16 @@ wasm_loader_unload(WASMModule *module)
     wasm_runtime_destroy_custom_sections(module->custom_section_list);
 #endif
 
+#if WASM_ENABLE_FAST_JIT != 0
+    if (module->fast_jit_func_ptrs) {
+        for (i = 0; i < module->function_count; i++) {
+            if (module->fast_jit_func_ptrs[i])
+                jit_code_cache_free(module->fast_jit_func_ptrs[i]);
+        }
+        wasm_runtime_free(module->fast_jit_func_ptrs);
+    }
+#endif
+
     wasm_runtime_free(module);
 }
 
@@ -7584,7 +7635,7 @@ re_scan:
                 PUSH_OFFSET_TYPE(local_type);
 #else
 #if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
-    && (WASM_ENABLE_DEBUG_INTERP == 0)
+    && (WASM_ENABLE_FAST_JIT == 0) && (WASM_ENABLE_DEBUG_INTERP == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_GET_LOCAL_FAST;
                     if (is_32bit_type(local_type)) {
@@ -7648,7 +7699,7 @@ re_scan:
                 }
 #else
 #if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
-    && (WASM_ENABLE_DEBUG_INTERP == 0)
+    && (WASM_ENABLE_FAST_JIT == 0) && (WASM_ENABLE_DEBUG_INTERP == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_SET_LOCAL_FAST;
                     if (is_32bit_type(local_type)) {
@@ -7708,7 +7759,7 @@ re_scan:
                                - wasm_value_type_cell_num(local_type)));
 #else
 #if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
-    && (WASM_ENABLE_DEBUG_INTERP == 0)
+    && (WASM_ENABLE_FAST_JIT == 0) && (WASM_ENABLE_DEBUG_INTERP == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_TEE_LOCAL_FAST;
                     if (is_32bit_type(local_type)) {

+ 37 - 3
core/iwasm/interpreter/wasm_mini_loader.c

@@ -11,6 +11,10 @@
 #include "wasm_runtime.h"
 #include "../common/wasm_native.h"
 #include "../common/wasm_memory.h"
+#if WASM_ENABLE_FAST_JIT != 0
+#include "../fast-jit/jit_compiler.h"
+#include "../fast-jit/jit_codecache.h"
+#endif
 
 /* Read a value of given type from the address pointed to by the given
    pointer and increase the pointer to the position just after the
@@ -2139,6 +2143,18 @@ load_from_sections(WASMModule *module, WASMSection *sections,
         }
     }
 
+#if WASM_ENABLE_FAST_JIT != 0
+    if (!(module->fast_jit_func_ptrs =
+              loader_malloc(sizeof(void *) * module->function_count, error_buf,
+                            error_buf_size))) {
+        return false;
+    }
+    if (!jit_compiler_compile_all(module)) {
+        set_error_buf(error_buf, error_buf_size, "fast jit compilation failed");
+        return false;
+    }
+#endif
+
 #if WASM_ENABLE_MEMORY_TRACING != 0
     wasm_runtime_dump_module_mem_consumption(module);
 #endif
@@ -2356,6 +2372,11 @@ wasm_loader_load(uint8 *buf, uint32 size, char *error_buf,
         return NULL;
     }
 
+#if WASM_ENABLE_FAST_JIT != 0
+    module->load_addr = (uint8 *)buf;
+    module->load_size = size;
+#endif
+
     if (!load(buf, size, module, error_buf, error_buf_size)) {
         goto fail;
     }
@@ -2453,6 +2474,16 @@ wasm_loader_unload(WASMModule *module)
     }
 #endif
 
+#if WASM_ENABLE_FAST_JIT != 0
+    if (module->fast_jit_func_ptrs) {
+        for (i = 0; i < module->function_count; i++) {
+            if (module->fast_jit_func_ptrs[i])
+                jit_code_cache_free(module->fast_jit_func_ptrs[i]);
+        }
+        wasm_runtime_free(module->fast_jit_func_ptrs);
+    }
+#endif
+
     wasm_runtime_free(module);
 }
 
@@ -5778,7 +5809,8 @@ re_scan:
                 operand_offset = local_offset;
                 PUSH_OFFSET_TYPE(local_type);
 #else
-#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0)
+#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
+    && (WASM_ENABLE_FAST_JIT == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_GET_LOCAL_FAST;
                     if (is_32bit_type(local_type))
@@ -5838,7 +5870,8 @@ re_scan:
                     POP_OFFSET_TYPE(local_type);
                 }
 #else
-#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0)
+#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
+    && (WASM_ENABLE_FAST_JIT == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_SET_LOCAL_FAST;
                     if (is_32bit_type(local_type))
@@ -5894,7 +5927,8 @@ re_scan:
                              *(loader_ctx->frame_offset
                                - wasm_value_type_cell_num(local_type)));
 #else
-#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0)
+#if (WASM_ENABLE_WAMR_COMPILER == 0) && (WASM_ENABLE_JIT == 0) \
+    && (WASM_ENABLE_FAST_JIT == 0)
                 if (local_offset < 0x80) {
                     *p_org++ = EXT_OP_TEE_LOCAL_FAST;
                     if (is_32bit_type(local_type))

+ 82 - 12
core/iwasm/interpreter/wasm_runtime.c

@@ -56,7 +56,7 @@ wasm_load(uint8 *buf, uint32 size, char *error_buf, uint32 error_buf_size)
 
 WASMModule *
 wasm_load_from_sections(WASMSection *section_list, char *error_buf,
-                        uint32_t error_buf_size)
+                        uint32 error_buf_size)
 {
     return wasm_loader_load_from_sections(section_list, error_buf,
                                           error_buf_size);
@@ -349,6 +349,24 @@ memory_instantiate(WASMModuleInstance *module_inst, uint32 num_bytes_per_page,
         }
     }
 
+#if WASM_ENABLE_FAST_JIT != 0
+    if (memory_data_size > 0) {
+#if UINTPTR_MAX == UINT64_MAX
+        memory->mem_bound_check_1byte = memory_data_size - 1;
+        memory->mem_bound_check_2bytes = memory_data_size - 2;
+        memory->mem_bound_check_4bytes = memory_data_size - 4;
+        memory->mem_bound_check_8bytes = memory_data_size - 8;
+        memory->mem_bound_check_16bytes = memory_data_size - 16;
+#else
+        memory->mem_bound_check_1byte = (uint32)memory_data_size - 1;
+        memory->mem_bound_check_2bytes = (uint32)memory_data_size - 2;
+        memory->mem_bound_check_4bytes = (uint32)memory_data_size - 4;
+        memory->mem_bound_check_8bytes = (uint32)memory_data_size - 8;
+        memory->mem_bound_check_16bytes = (uint32)memory_data_size - 16;
+#endif
+    }
+#endif
+
 #if WASM_ENABLE_SHARED_MEMORY != 0
     if (0 != os_mutex_init(&memory->mem_lock)) {
         set_error_buf(error_buf, error_buf_size, "init mutex failed");
@@ -693,6 +711,10 @@ functions_instantiate(const WASMModule *module, WASMModuleInstance *module_inst,
         function++;
     }
 
+#if WASM_ENABLE_FAST_JIT != 0
+    module_inst->fast_jit_func_ptrs = module->fast_jit_func_ptrs;
+#endif
+
     bh_assert((uint32)(function - functions) == function_count);
     (void)module_inst;
     return functions;
@@ -2470,6 +2492,22 @@ wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count)
     memory->memory_data_end =
         memory->memory_data + memory->num_bytes_per_page * total_page_count;
 
+#if WASM_ENABLE_FAST_JIT != 0
+#if UINTPTR_MAX == UINT64_MAX
+    memory->mem_bound_check_1byte = total_size - 1;
+    memory->mem_bound_check_2bytes = total_size - 2;
+    memory->mem_bound_check_4bytes = total_size - 4;
+    memory->mem_bound_check_8bytes = total_size - 8;
+    memory->mem_bound_check_16bytes = total_size - 16;
+#else
+    memory->mem_bound_check_1byte = (uint32)total_size - 1;
+    memory->mem_bound_check_2bytes = (uint32)total_size - 2;
+    memory->mem_bound_check_4bytes = (uint32)total_size - 4;
+    memory->mem_bound_check_8bytes = (uint32)total_size - 8;
+    memory->mem_bound_check_16bytes = (uint32)total_size - 16;
+#endif
+#endif
+
     return ret;
 }
 #else
@@ -2564,14 +2602,14 @@ wasm_enlarge_table(WASMModuleInstance *module_inst, uint32 table_idx,
 }
 #endif /* WASM_ENABLE_REF_TYPES != 0 */
 
-bool
-wasm_call_indirect(WASMExecEnv *exec_env, uint32_t tbl_idx,
-                   uint32_t element_indices, uint32_t argc, uint32_t argv[])
+static bool
+call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
+              uint32 argc, uint32 argv[], bool check_type_idx, uint32 type_idx)
 {
     WASMModuleInstance *module_inst = NULL;
     WASMTableInstance *table_inst = NULL;
-    uint32_t function_indices = 0;
-    WASMFunctionInstance *function_inst = NULL;
+    uint32 func_idx = 0;
+    WASMFunctionInstance *func_inst = NULL;
 
     module_inst = (WASMModuleInstance *)exec_env->module_inst;
     bh_assert(module_inst);
@@ -2582,7 +2620,7 @@ wasm_call_indirect(WASMExecEnv *exec_env, uint32_t tbl_idx,
         goto got_exception;
     }
 
-    if (element_indices >= table_inst->cur_size) {
+    if (elem_idx >= table_inst->cur_size) {
         wasm_set_exception(module_inst, "undefined element");
         goto got_exception;
     }
@@ -2591,8 +2629,8 @@ wasm_call_indirect(WASMExecEnv *exec_env, uint32_t tbl_idx,
      * please be aware that table_inst->base_addr may point
      * to another module's table
      **/
-    function_indices = ((uint32_t *)table_inst->base_addr)[element_indices];
-    if (function_indices == NULL_REF) {
+    func_idx = ((uint32 *)table_inst->base_addr)[elem_idx];
+    if (func_idx == NULL_REF) {
         wasm_set_exception(module_inst, "uninitialized element");
         goto got_exception;
     }
@@ -2600,14 +2638,29 @@ wasm_call_indirect(WASMExecEnv *exec_env, uint32_t tbl_idx,
     /**
      * we insist to call functions owned by the module itself
      **/
-    if (function_indices >= module_inst->function_count) {
+    if (func_idx >= module_inst->function_count) {
         wasm_set_exception(module_inst, "unknown function");
         goto got_exception;
     }
 
-    function_inst = module_inst->functions + function_indices;
+    func_inst = module_inst->functions + func_idx;
+
+    if (check_type_idx) {
+        WASMType *cur_type = module_inst->module->types[type_idx];
+        WASMType *cur_func_type;
 
-    interp_call_wasm(module_inst, exec_env, function_inst, argc, argv);
+        if (func_inst->is_import_func)
+            cur_func_type = func_inst->u.func_import->func_type;
+        else
+            cur_func_type = func_inst->u.func->func_type;
+
+        if (!wasm_type_equal(cur_type, cur_func_type)) {
+            wasm_set_exception(module_inst, "indirect call type mismatch");
+            goto got_exception;
+        }
+    }
+
+    interp_call_wasm(module_inst, exec_env, func_inst, argc, argv);
 
     (void)clear_wasi_proc_exit_exception(module_inst);
     return !wasm_get_exception(module_inst) ? true : false;
@@ -2616,6 +2669,23 @@ got_exception:
     return false;
 }
 
+bool
+wasm_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
+                   uint32 argc, uint32 argv[])
+{
+    return call_indirect(exec_env, tbl_idx, elem_idx, argc, argv, false, 0);
+}
+
+#if WASM_ENABLE_FAST_JIT != 0
+bool
+jit_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
+                  uint32 type_idx, uint32 argc, uint32 argv[])
+{
+    return call_indirect(exec_env, tbl_idx, elem_idx, argc, argv, true,
+                         type_idx);
+}
+#endif
+
 #if WASM_ENABLE_THREAD_MGR != 0
 bool
 wasm_set_aux_stack(WASMExecEnv *exec_env, uint32 start_offset, uint32 size)

+ 32 - 6
core/iwasm/interpreter/wasm_runtime.h

@@ -52,6 +52,22 @@ struct WASMMemoryInstance {
        Note: when memory is re-allocated, the heap data and memory data
              must be copied to new memory also. */
     uint8 *memory_data;
+
+#if WASM_ENABLE_FAST_JIT != 0
+#if UINTPTR_MAX == UINT64_MAX
+    uint64 mem_bound_check_1byte;
+    uint64 mem_bound_check_2bytes;
+    uint64 mem_bound_check_4bytes;
+    uint64 mem_bound_check_8bytes;
+    uint64 mem_bound_check_16bytes;
+#else
+    uint32 mem_bound_check_1byte;
+    uint32 mem_bound_check_2bytes;
+    uint32 mem_bound_check_4bytes;
+    uint32 mem_bound_check_8bytes;
+    uint32 mem_bound_check_16bytes;
+#endif
+#endif
 };
 
 struct WASMTableInstance {
@@ -167,6 +183,10 @@ struct WASMModuleInstance {
 
     /* Array of function pointers to import functions */
     void **import_func_ptrs;
+#if WASM_ENABLE_FAST_JIT != 0
+    /* point to JITed functions */
+    void **fast_jit_func_ptrs;
+#endif
 
     WASMMemoryInstance **memories;
     WASMTableInstance **tables;
@@ -280,7 +300,7 @@ wasm_load(uint8 *buf, uint32 size, char *error_buf, uint32 error_buf_size);
 
 WASMModule *
 wasm_load_from_sections(WASMSection *section_list, char *error_buf,
-                        uint32_t error_buf_size);
+                        uint32 error_buf_size);
 
 void
 wasm_unload(WASMModule *module);
@@ -366,16 +386,22 @@ wasm_get_app_addr_range(WASMModuleInstance *module_inst, uint32 app_offset,
                         uint32 *p_app_start_offset, uint32 *p_app_end_offset);
 
 bool
-wasm_get_native_addr_range(WASMModuleInstance *module_inst, uint8_t *native_ptr,
-                           uint8_t **p_native_start_addr,
-                           uint8_t **p_native_end_addr);
+wasm_get_native_addr_range(WASMModuleInstance *module_inst, uint8 *native_ptr,
+                           uint8 **p_native_start_addr,
+                           uint8 **p_native_end_addr);
 
 bool
 wasm_enlarge_memory(WASMModuleInstance *module, uint32 inc_page_count);
 
 bool
-wasm_call_indirect(WASMExecEnv *exec_env, uint32_t tbl_idx,
-                   uint32_t element_indices, uint32_t argc, uint32_t argv[]);
+wasm_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
+                   uint32 argc, uint32 argv[]);
+
+#if WASM_ENABLE_FAST_JIT != 0
+bool
+jit_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 elem_idx,
+                  uint32 type_idx, uint32 argc, uint32 argv[]);
+#endif
 
 #if WASM_ENABLE_THREAD_MGR != 0
 bool

+ 39 - 20
doc/build_wamr.md

@@ -29,19 +29,20 @@ The script `runtime_lib.cmake` defines a number of variables for configuring the
 cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM
 ```
 
-#### **Configure interpreter**
+#### **Configure interpreters**
 
 - **WAMR_BUILD_INTERP**=1/0: enable or disable WASM interpreter
 
 - **WAMR_BUILD_FAST_INTERP**=1/0: build fast (default) or classic WASM interpreter.
 
-  NOTE: the fast interpreter runs ~2X faster than classic interpreter, but consumes about 2X memory to hold the WASM bytecode code.
+  NOTE: the fast interpreter runs ~2X faster than classic interpreter, but consumes about 2X memory to hold the pre-compiled code.
 
-#### **Configure AOT and JIT**
+#### **Configure AOT and JITs**
 
-- **WAMR_BUILD_AOT**=1/0, default to enable if not set
-- **WAMR_BUILD_JIT**=1/0, default to disable if not set
+- **WAMR_BUILD_AOT**=1/0, enable AOT or not, default to enable if not set
+- **WAMR_BUILD_JIT**=1/0, enable LLVM JIT or not, default to disable if not set
 - **WAMR_BUILD_LAZY_JIT**=1/0, whether to use Lazy JIT mode or not when *WAMR_BUILD_JIT* is set, default to enable if not set
+- **WAMR_BUILD_FAST_JIT**=1/0, enable Fast JIT or not, default to disable if not set
 
 #### **Configure LIBC**
 
@@ -206,8 +207,7 @@ sudo dnf install glibc-devel.i686
 After installing dependencies, build the source code:
 ``` Bash
 cd product-mini/platforms/linux/
-mkdir build
-cd build
+mkdir build && cd build
 cmake ..
 make
 # iwasm is generated under current directory
@@ -216,30 +216,49 @@ make
 By default in Linux, the `fast interpreter`, `AOT` and `Libc WASI` are enabled, and JIT is disabled.
 And the build target is set to X86_64 or X86_32 depending on the platform's bitwidth.
 
-To run a wasm file with interpreter mode:
+There are total 6 running modes supported: fast interpreter, classi interpreter, AOT, LLVM Lazy JIT, LLVM MC JIT and Fast JIT.
+
+(1) To run a wasm file with `fast interpreter` mode - build iwasm with default build and then:
 ```Bash
 iwasm <wasm file>
 ```
-To run an AOT file, firstly please refer to [Build wamrc AOT compiler](../README.md#build-wamrc-aot-compiler) to build wamrc, and then:
+Or
+```Bash
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_INTERP=1
+make
+```
+
+(2) To disable `fast interpreter` and enable `classic interpreter` instead:
+``` Bash
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_FAST_INTERP=0
+make
+```
+
+(3) To run an AOT file, firstly please refer to [Build wamrc AOT compiler](../README.md#build-wamrc-aot-compiler) to build wamrc, and then:
 ```Bash
 wamrc -o <AOT file> <WASM file>
 iwasm <AOT file>
 ```
 
-To enable the `JIT` mode, firstly we should build LLVM:
-
+(4) To enable the `LLVM Lazy JIT` mode, firstly we should build LLVM library:
 ``` Bash
 cd product-mini/platforms/linux/
 ./build_llvm.sh     (The llvm source code is cloned under <wamr_root_dir>/core/deps/llvm and auto built)
 ```
 
-Then pass argument `-DWAMR_BUILD_JIT=1` to cmake to enable WASM JIT:
-
+Then pass argument `-DWAMR_BUILD_JIT=1` to cmake to enable LLVM Lazy JIT:
 ``` Bash
-mkdir build
-cd build
+mkdir build && cd build
 cmake .. -DWAMR_BUILD_JIT=1
-# or "cmake .. -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0" to disable LLVM Lazy JIT and enable LLVM MC JIT
+make
+```
+
+(5) Or disable `LLVM Lazy JIT` and enable `LLVM MC JIT` instead:
+```Bash
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_JIT=1 -DWAMR_BUILD_LAZY_JIT=0
 make
 ```
 
@@ -248,13 +267,13 @@ by creating threads to compile the WASM functions parallely, and for the main th
 module will not be compiled until they are firstly called and haven't been compiled by the compilation threads.
 To disable it and enable LLVM MC JIT instead, please pass argument `-DWAMR_BUILD_LAZY_JIT=0` to cmake.
 
-To disable `fast interpreter` and enable `classic interpreter` instead:
+(6) To enable the `Fast JIT` mode:
 ``` Bash
-mkdir build
-cd build
-cmake .. -DWAMR_BUILD_FAST_INTERP=0
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_FAST_JIT=1
 make
 ```
+The Fast JIT is a lightweight JIT engine with quick startup, small footprint and good portability, and gains ~50% performance of AOT.
 
 Linux SGX (Intel Software Guard Extension)
 -------------------------

+ 8 - 2
doc/linux_sgx.md

@@ -11,12 +11,18 @@ After installing the dependencies, build the source code:
 ``` Bash
 source <SGX_SDK dir>/environment
 cd product-mini/platforms/linux-sgx/
-mkdir build
-cd build
+mkdir build && cd build
 cmake ..
 make
 ```
 
+By default the `fast interpreter` and `AOT` is enabled. If to enable `Fast JIT`, run:
+```Bash
+mkdir build && cd build
+cmake .. -DWAMR_BUILD_FAST_JIT=1
+make
+```
+
 This builds two libraries required by SGX application:
  - libvmlib.a for Enclave part
  - libvmlib_untrusted.a for App part

+ 5 - 0
product-mini/platforms/darwin/CMakeLists.txt

@@ -51,6 +51,11 @@ if (NOT DEFINED WAMR_BUILD_JIT)
   set (WAMR_BUILD_JIT 0)
 endif ()
 
+if (NOT DEFINED WAMR_BUILD_FAST_JIT)
+  # Disable Fast JIT by default
+  set (WAMR_BUILD_FAST_JIT 0)
+endif ()
+
 if (NOT DEFINED WAMR_BUILD_LIBC_BUILTIN)
   # Enable libc builtin support by default
   set (WAMR_BUILD_LIBC_BUILTIN 1)

+ 5 - 0
product-mini/platforms/linux-sgx/CMakeLists.txt

@@ -44,6 +44,11 @@ if (NOT DEFINED WAMR_BUILD_JIT)
   set (WAMR_BUILD_JIT 0)
 endif ()
 
+if (NOT DEFINED WAMR_BUILD_FAST_JIT)
+  # Disable Fast JIT by default
+  set (WAMR_BUILD_FAST_JIT 0)
+endif ()
+
 if (NOT DEFINED WAMR_BUILD_LIBC_BUILTIN)
   # Enable libc builtin support by default
   set (WAMR_BUILD_LIBC_BUILTIN 1)

+ 14 - 0
product-mini/platforms/linux/CMakeLists.txt

@@ -52,6 +52,11 @@ if (NOT DEFINED WAMR_BUILD_JIT)
   set (WAMR_BUILD_JIT 0)
 endif ()
 
+if (NOT DEFINED WAMR_BUILD_FAST_JIT)
+  # Disable Fast JIT by default
+  set (WAMR_BUILD_FAST_JIT 0)
+endif ()
+
 if (NOT DEFINED WAMR_BUILD_LIBC_BUILTIN)
   # Enable libc builtin support by default
   set (WAMR_BUILD_LIBC_BUILTIN 1)
@@ -117,14 +122,20 @@ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections -pie -f
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat -Wformat-security -Wshadow")
 # set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wconversion -Wsign-conversion")
 
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wformat -Wformat-security")
+
 if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
   if (NOT (CMAKE_C_COMPILER MATCHES ".*clang.*" OR CMAKE_C_COMPILER_ID MATCHES ".*Clang"))
     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mindirect-branch-register")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register")
     # UNDEFINED BEHAVIOR, refer to https://en.cppreference.com/w/cpp/language/ub
     if(CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT WAMR_BUILD_JIT EQUAL 1)
       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined \
                                           -fno-sanitize=bounds,bounds-strict,alignment \
                                           -fno-sanitize-recover")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined \
+                                              -fno-sanitize=bounds,bounds-strict,alignment \
+                                              -fno-sanitize-recover")
     endif()
   else ()
     # UNDEFINED BEHAVIOR, refer to https://en.cppreference.com/w/cpp/language/ub
@@ -132,6 +143,9 @@ if (WAMR_BUILD_TARGET MATCHES "X86_.*" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
       set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined \
                                           -fno-sanitize=bounds,alignment \
                                           -fno-sanitize-recover")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined \
+                                              -fno-sanitize=bounds,alignment \
+                                              -fno-sanitize-recover")
     endif()
   endif ()
 endif ()

+ 18 - 0
product-mini/platforms/posix/main.c

@@ -34,6 +34,10 @@ print_help()
 #endif
     printf("  --stack-size=n         Set maximum stack size in bytes, default is 16 KB\n");
     printf("  --heap-size=n          Set maximum heap size in bytes, default is 16 KB\n");
+#if WASM_ENABLE_FAST_JIT != 0
+    printf("  --jit-codecache-size=n Set fast jit maximum code cache size in bytes,\n");
+    printf("                         default is %u KB\n", FAST_JIT_DEFAULT_CODE_CACHE_SIZE / 1024);
+#endif
     printf("  --repl                 Start a very simple REPL (read-eval-print-loop) mode\n"
            "                         that runs commands in the form of \"FUNC ARG...\"\n");
 #if WASM_ENABLE_LIBC_WASI != 0
@@ -295,6 +299,9 @@ main(int argc, char *argv[])
     uint8 *wasm_file_buf = NULL;
     uint32 wasm_file_size;
     uint32 stack_size = 16 * 1024, heap_size = 16 * 1024;
+#if WASM_ENABLE_FAST_JIT != 0
+    uint32 jit_code_cache_size = FAST_JIT_DEFAULT_CODE_CACHE_SIZE;
+#endif
     wasm_module_t wasm_module = NULL;
     wasm_module_inst_t wasm_module_inst = NULL;
     RuntimeInitArgs init_args;
@@ -354,6 +361,13 @@ main(int argc, char *argv[])
                 return print_help();
             heap_size = atoi(argv[0] + 12);
         }
+#if WASM_ENABLE_FAST_JIT != 0
+        else if (!strncmp(argv[0], "--jit-codecache-size=", 21)) {
+            if (argv[0][21] == '\0')
+                return print_help();
+            jit_code_cache_size = atoi(argv[0] + 21);
+        }
+#endif
 #if WASM_ENABLE_LIBC_WASI != 0
         else if (!strncmp(argv[0], "--dir=", 6)) {
             if (argv[0][6] == '\0')
@@ -471,6 +485,10 @@ main(int argc, char *argv[])
     init_args.mem_alloc_option.allocator.free_func = free;
 #endif
 
+#if WASM_ENABLE_FAST_JIT != 0
+    init_args.fast_jit_code_cache_size = jit_code_cache_size;
+#endif
+
 #if WASM_ENABLE_DEBUG_INTERP != 0
     init_args.platform_port = 0;
     init_args.instance_port = instance_port;

+ 23 - 5
tests/wamr-test-suites/test_wamr.sh

@@ -8,7 +8,7 @@
 function DEBUG() {
   [[ -n $(env | grep "\<DEBUG\>") ]] && $@
 }
-DEBUG set -xEevuo pipefail
+DEBUG set -xv pipefail
 
 function help()
 {
@@ -16,7 +16,7 @@ function help()
     echo "-c clean previous test results, not start test"
     echo "-s {suite_name} test only one suite (spec)"
     echo "-m set compile target of iwasm(x86_64\x86_32\armv7_vfp\thumbv7_vfp\riscv64_lp64d\riscv64_lp64)"
-    echo "-t set compile type of iwasm(classic-interp\fast-interp\jit\aot)"
+    echo "-t set compile type of iwasm(classic-interp\fast-interp\jit\aot\fast-jit)"
     echo "-M enable multi module feature"
     echo "-p enable multi thread feature"
     echo "-S enable SIMD feature"
@@ -29,7 +29,7 @@ function help()
 OPT_PARSED=""
 WABT_BINARY_RELEASE="NO"
 #default type
-TYPE=("classic-interp" "fast-interp" "jit" "aot")
+TYPE=("classic-interp" "fast-interp" "jit" "aot" "fast-jit")
 #default target
 TARGET="X86_64"
 ENABLE_MULTI_MODULE=0
@@ -80,7 +80,7 @@ do
         t)
         echo "set compile type of wamr " ${OPTARG}
         if [[ ${OPTARG} != "classic-interp" && ${OPTARG} != "fast-interp" \
-            && ${OPTARG} != "jit" && ${OPTARG} != "aot" ]]; then
+            && ${OPTARG} != "jit" && ${OPTARG} != "aot" && ${OPTARG} != "fast-jit" ]]; then
             echo "*----- please varify a type of compile when using -t! -----*"
             help
             exit 1
@@ -186,11 +186,19 @@ readonly AOT_COMPILE_FLAGS="\
     -DWAMR_BUILD_SPEC_TEST=1 \
     -DCOLLECT_CODE_COVERAGE=${COLLECT_CODE_COVERAGE}"
 
+readonly FAST_JIT_COMPILE_FLAGS="\
+    -DWAMR_BUILD_TARGET=${TARGET} \
+    -DWAMR_BUILD_INTERP=1 -DWAMR_BUILD_FAST_INTERP=0 \
+    -DWAMR_BUILD_JIT=0 -DWAMR_BUILD_AOT=0 \
+    -DWAMR_BUILD_FAST_JIT=1 \
+    -DWAMR_BUILD_SPEC_TEST=1"
+
 readonly COMPILE_FLAGS=(
         "${CLASSIC_INTERP_COMPILE_FLAGS}"
         "${FAST_INTERP_COMPILE_FLAGS}"
         "${JIT_COMPILE_FLAGS}"
         "${AOT_COMPILE_FLAGS}"
+        "${FAST_JIT_COMPILE_FLAGS}"
     )
 
 # TODO: with libiwasm.so only
@@ -609,6 +617,16 @@ function trigger()
                 collect_coverage aot
             ;;
 
+            "fast-jit")
+                echo "work in fast-jit mode"
+                # jit
+                BUILD_FLAGS="$FAST_JIT_COMPILE_FLAGS $EXTRA_COMPILE_FLAGS"
+                build_iwasm_with_cfg $BUILD_FLAGS
+                for suite in "${TEST_CASE_ARR[@]}"; do
+                    $suite"_test" fast-jit
+                done
+            ;;
+
             *)
             echo "unexpected mode, do nothing"
             ;;
@@ -627,6 +645,6 @@ else
 fi
 
 echo -e "\033[32mTest finish. Reports are under ${REPORT_DIR} \033[0m"
-DEBUG set +xEevuo pipefail
+DEBUG set +xv pipefail
 echo "TEST SUCCESSFUL"
 exit 0