Parcourir la source

Implement AOT support for RISCV (#649)

Enable RISCV AOT support, the supported ABIs are LP64 and LP64D for riscv64, ILP32 and ILP32D for riscv32.
For wamrc:
    use --target=riscv64/riscv32 to specify the target arch of output AOT file,
    use --target-abi=lp64d/lp64/ilp32d/ilp32 to specify the target ABI,
    if --target-abi isn't specified, by default lp64d is used for riscv64, and ilp32d is used for riscv32.

Signed-off-by: Huang Qi <huangqi3@xiaomi.com>
Co-authored-by: wenyongh <wenyong.huang@intel.com>
Huang Qi il y a 4 ans
Parent
commit
e4023c8e02

+ 8 - 3
build-scripts/config_common.cmake

@@ -51,7 +51,8 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
 endif ()
 
 if (CMAKE_SIZEOF_VOID_P EQUAL 8)
-  if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64" OR WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "RISCV64.*")
+  if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64"
+      OR WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "RISCV64.*")
     if (NOT WAMR_BUILD_PLATFORM STREQUAL "windows")
       # Add -fPIC flag if build as 64-bit
       set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
@@ -186,8 +187,12 @@ else ()
   add_definitions (-DWASM_DISABLE_HW_BOUND_CHECK=0)
 endif ()
 if (WAMR_BUILD_SIMD EQUAL 1)
-  add_definitions (-DWASM_ENABLE_SIMD=1)
-  message ("     SIMD enabled")
+  if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*")
+    add_definitions (-DWASM_ENABLE_SIMD=1)
+    message ("     SIMD enabled")
+  else ()
+    message ("     SIMD disabled due to not supported on target RISCV64")
+  endif ()
 endif ()
 if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1)
   add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1)

+ 0 - 5
build-scripts/runtime_lib.cmake

@@ -52,11 +52,6 @@ if (WAMR_BUILD_INTERP EQUAL 1 OR WAMR_BUILD_JIT EQUAL 1)
     include (${IWASM_DIR}/interpreter/iwasm_interp.cmake)
 endif ()
 
-if (WAMR_BUILD_TARGET MATCHES "RISCV.*" AND WAMR_BUILD_AOT EQUAL 1)
-    set (WAMR_BUILD_AOT 0)
-    message ("-- WAMR AOT disabled as it isn't supported by riscv currently")
-endif ()
-
 if (WAMR_BUILD_AOT EQUAL 1)
     include (${IWASM_DIR}/aot/iwasm_aot.cmake)
     if (WAMR_BUILD_JIT EQUAL 1)

+ 2 - 1
core/app-mgr/app-manager/module_wasm_app.c

@@ -1395,7 +1395,8 @@ wasm_app_module_on_install_request_byte_arrive(uint8 ch,
                 if (section->section_type == AOT_SECTION_TYPE_TEXT) {
                     int map_prot =
                         MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
                     /* aot code and data in x86_64 must be in range 0 to 2G due to
                        relocation for R_X86_64_32/32S/PC32 */
                     int map_flags = MMAP_MAP_32BIT;

+ 9 - 2
core/iwasm/aot/aot_loader.c

@@ -182,6 +182,7 @@ GET_U64_FROM_ADDR(uint32 *addr)
 #define E_MACHINE_MIPS_X   51       /* Stanford MIPS-X */
 #define E_MACHINE_X86_64   62       /* AMD x86-64 architecture */
 #define E_MACHINE_XTENSA   94       /* Tensilica Xtensa Architecture */
+#define E_MACHINE_RISCV    243      /* RISC-V 32/64 */
 #define E_MACHINE_WIN_X86_64 0x8664 /* Windowx x86-64 architecture */
 
 /* Legal values for e_version */
@@ -257,6 +258,9 @@ get_aot_file_target(AOTTargetInfo *target_info,
         case E_MACHINE_XTENSA:
             machine_type = "xtensa";
             break;
+        case E_MACHINE_RISCV:
+            machine_type = "riscv";
+            break;
         default:
             set_error_buf_v(error_buf, error_buf_size,
                             "unknown machine type %d",
@@ -1030,7 +1034,8 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
     /* Create each data section */
     for (i = 0; i < module->data_section_count; i++) {
         int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
         /* aot code and data in x86_64 must be in range 0 to 2G due to
            relocation for R_X86_64_32/32S/PC32 */
         int map_flags = MMAP_MAP_32BIT;
@@ -1501,6 +1506,7 @@ do_text_relocation(AOTModule *module,
             symbol_addr = module->code;
         }
         else if (!strcmp(symbol, ".data")
+                 || !strcmp(symbol, ".sdata")
                  || !strcmp(symbol, ".rdata")
                  || !strcmp(symbol, ".rodata")
                  /* ".rodata.cst4/8/16/.." */
@@ -2235,7 +2241,8 @@ create_sections(const uint8 *buf, uint32 size,
                 if (section_size > 0) {
                     int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE
                                    | MMAP_PROT_EXEC;
-#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
                     /* aot code and data in x86_64 must be in range 0 to 2G due to
                        relocation for R_X86_64_32/32S/PC32 */
                     int map_flags = MMAP_MAP_32BIT;

+ 6 - 6
core/iwasm/aot/aot_runtime.c

@@ -2776,12 +2776,12 @@ aot_table_copy(AOTModuleInstance *module_inst,
     /* if src_offset >= dst_offset, copy from front to back */
     /* if src_offset < dst_offset, copy from back to front */
     /* merge all together */
-    bh_memcpy_s((uint8 *)(dst_tbl_inst) + offsetof(AOTTableInstance, data)
-                  + dst_offset * sizeof(uint32),
-                (dst_tbl_inst->cur_size - dst_offset) * sizeof(uint32),
-                (uint8 *)(src_tbl_inst) + offsetof(AOTTableInstance, data)
-                  + src_offset * sizeof(uint32),
-                length * sizeof(uint32));
+    bh_memmove_s((uint8 *)(dst_tbl_inst) + offsetof(AOTTableInstance, data)
+                   + dst_offset * sizeof(uint32),
+                 (dst_tbl_inst->cur_size - dst_offset) * sizeof(uint32),
+                 (uint8 *)(src_tbl_inst) + offsetof(AOTTableInstance, data)
+                   + src_offset * sizeof(uint32),
+                 length * sizeof(uint32));
 }
 
 void

+ 337 - 0
core/iwasm/aot/arch/aot_reloc_riscv.c

@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2021 XiaoMi Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "aot_reloc.h"
+
+#define R_RISCV_32       1
+#define R_RISCV_64       2
+#define R_RISCV_CALL     18
+#define R_RISCV_CALL_PLT 19
+#define R_RISCV_HI20     26
+#define R_RISCV_LO12_I   27
+#define R_RISCV_LO12_S   28
+
+#define RV_OPCODE_SW 0x23
+
+void __divdi3();
+void __moddi3();
+void __muldi3();
+void __udivdi3();
+void __umoddi3();
+
+static SymbolMap target_sym_map[] = {
+    REG_COMMON_SYMBOLS
+    REG_SYM(__divdi3),
+    REG_SYM(__moddi3),
+    REG_SYM(__muldi3),
+    REG_SYM(__udivdi3),
+    REG_SYM(__umoddi3),
+};
+
+static void
+set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
+{
+    if (error_buf != NULL)
+        snprintf(error_buf, error_buf_size, "%s", string);
+}
+
+void
+get_current_target(char *target_buf, uint32 target_buf_size)
+{
+    snprintf(target_buf, target_buf_size, "riscv");
+}
+
+uint32
+get_plt_item_size()
+{
+#if __riscv_xlen == 64
+    /* auipc + ld + jalr + nop + addr */
+    return 20;
+#else
+    return 0;
+#endif
+}
+
+SymbolMap *
+get_target_symbol_map(uint32 *sym_num)
+{
+    *sym_num = sizeof(target_sym_map) / sizeof(SymbolMap);
+    return target_sym_map;
+}
+
+/* Get a val from given address */
+static uint32
+rv_get_val(uint16 *addr)
+{
+    uint32 ret;
+    ret = *addr | (*(addr + 1)) << 16;
+    return ret;
+}
+
+/* Set a val to given address */
+static void
+rv_set_val(uint16 *addr, uint32 val)
+{
+    *addr = (val & 0xffff);
+    *(addr + 1) = (val >> 16);
+
+    asm volatile("fence.i");
+}
+
+/* Add a val to given address */
+static void
+rv_add_val(uint16 *addr, uint32 val)
+{
+    uint32 cur = rv_get_val(addr);
+    rv_set_val(addr, cur + val);
+}
+
+/**
+ * Get imm_hi and imm_lo from given integer
+ *
+ * @param imm given integer, signed 32bit
+ * @param imm_hi signed 20bit
+ * @param imm_lo signed 12bit
+ *
+ */
+static void
+rv_calc_imm(int32 imm, int32 *imm_hi, int32 *imm_lo)
+{
+    int32 lo;
+    int32 hi = imm / 4096;
+    int32 r = imm % 4096;
+
+    if (2047 < r) {
+        hi++;
+    }
+    else if (r < -2048) {
+        hi--;
+    }
+
+    lo = imm - (hi * 4096);
+
+    *imm_lo = lo;
+    *imm_hi = hi;
+}
+
+uint32
+get_plt_table_size()
+{
+    return get_plt_item_size() * (sizeof(target_sym_map) / sizeof(SymbolMap));
+}
+
+void
+init_plt_table(uint8 *plt)
+{
+#if __riscv_xlen == 64
+    uint32 i, num = sizeof(target_sym_map) / sizeof(SymbolMap);
+    uint8 *p;
+
+    for (i = 0; i < num; i++) {
+        p = plt;
+        /* auipc t1, 0 */
+        *(uint16*)p = 0x0317;
+        p += 2;
+        *(uint16*)p = 0x0000;
+        p += 2;
+        /* ld t1, 8(t1) */
+        *(uint16*)p = 0x3303;
+        p += 2;
+        *(uint16*)p = 0x00C3;
+        p += 2;
+        /* jr t1 */
+        *(uint16*)p = 0x8302;
+        p += 2;
+        /* nop */
+        *(uint16*)p = 0x0001;
+        p += 2;
+        bh_memcpy_s(p, 8, &target_sym_map[i].symbol_addr, 8);
+        p += 8;
+        plt += get_plt_item_size();
+    }
+#endif
+}
+
+typedef struct RelocTypeStrMap {
+    uint32 reloc_type;
+    char *reloc_str;
+} RelocTypeStrMap;
+
+#define RELOC_TYPE_MAP(reloc_type) { reloc_type, #reloc_type }
+
+static RelocTypeStrMap reloc_type_str_maps[] = {
+    RELOC_TYPE_MAP(R_RISCV_32),
+    RELOC_TYPE_MAP(R_RISCV_CALL),
+    RELOC_TYPE_MAP(R_RISCV_CALL_PLT),
+    RELOC_TYPE_MAP(R_RISCV_HI20),
+    RELOC_TYPE_MAP(R_RISCV_LO12_I),
+    RELOC_TYPE_MAP(R_RISCV_LO12_S),
+};
+
+static const char *
+reloc_type_to_str(uint32 reloc_type)
+{
+    uint32 i;
+
+    for (i = 0; i < sizeof(reloc_type_str_maps) / sizeof(RelocTypeStrMap); i++) {
+        if (reloc_type_str_maps[i].reloc_type == reloc_type)
+            return reloc_type_str_maps[i].reloc_str;
+    }
+
+    return "Unknown_Reloc_Type";
+}
+
+static bool
+check_reloc_offset(uint32 target_section_size,
+                   uint64 reloc_offset, uint32 reloc_data_size,
+                   char *error_buf, uint32 error_buf_size)
+{
+    if (!(reloc_offset < (uint64)target_section_size
+          && reloc_offset + reloc_data_size <= (uint64)target_section_size)) {
+        set_error_buf(error_buf, error_buf_size,
+                      "AOT module load failed: invalid relocation offset.");
+        return false;
+    }
+    return true;
+}
+
+bool
+apply_relocation(AOTModule *module,
+                 uint8 *target_section_addr, uint32 target_section_size,
+                 uint64 reloc_offset, uint64 reloc_addend, uint32 reloc_type,
+                 void *symbol_addr, int32 symbol_index,
+                 char *error_buf, uint32 error_buf_size)
+{
+    int32 val, imm_hi, imm_lo, insn;
+    uint8 *addr = target_section_addr + reloc_offset;
+    char buf[128];
+
+    switch (reloc_type) {
+        case R_RISCV_32:
+        {
+            uint32 val_32 = (uint32)(uintptr_t)((uint8 *)symbol_addr + reloc_addend);
+
+            CHECK_RELOC_OFFSET(sizeof(uint32));
+            if (val_32 != (uintptr_t)((uint8 *)symbol_addr + reloc_addend)) {
+                goto fail_addr_out_of_range;
+            }
+
+            rv_set_val((uint16 *)addr, val_32);
+            break;
+        }
+        case R_RISCV_64:
+        {
+            uint64 val_64 = (uint64)(uintptr_t)((uint8 *)symbol_addr + reloc_addend);
+            CHECK_RELOC_OFFSET(sizeof(uint64));
+            bh_memcpy_s(addr, 8, &val_64, 8);
+            break;
+        }
+        case R_RISCV_CALL:
+        case R_RISCV_CALL_PLT:
+        {
+            val = (int32)(intptr_t)((uint8 *)symbol_addr - addr);
+
+            CHECK_RELOC_OFFSET(sizeof(uint32));
+            if (val != (intptr_t)((uint8 *)symbol_addr - addr)) {
+                if (symbol_index >= 0) {
+                    /* Call runtime function by plt code */
+                    symbol_addr = (uint8*)module->code + module->code_size
+                                  - get_plt_table_size()
+                                  + get_plt_item_size() * symbol_index;
+                    val = (int32)(intptr_t)((uint8*)symbol_addr - addr);
+                }
+            }
+
+            if (val != (intptr_t)((uint8 *)symbol_addr - addr)) {
+                goto fail_addr_out_of_range;
+            }
+
+            rv_calc_imm(val, &imm_hi, &imm_lo);
+
+            rv_add_val((uint16 *)addr, (imm_hi << 12));
+            if ((rv_get_val((uint16 *)(addr + 4)) & 0x7f) == RV_OPCODE_SW) {
+                /* Adjust imm for SW : S-type */
+                val =
+                  (((int32)imm_lo >> 5) << 25) + (((int32)imm_lo & 0x1f) << 7);
+
+                rv_add_val((uint16 *)(addr + 4), val);
+            }
+            else {
+                /* Adjust imm for MV(ADDI)/JALR : I-type */
+                rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20));
+            }
+            break;
+        }
+
+        case R_RISCV_HI20:
+        {
+            val = (int32)(intptr_t)(symbol_addr + reloc_addend);
+
+            CHECK_RELOC_OFFSET(sizeof(uint32));
+            if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend)) {
+                goto fail_addr_out_of_range;
+            }
+
+            addr = target_section_addr + reloc_offset;
+            insn = rv_get_val((uint16 *)addr);
+            rv_calc_imm(val, &imm_hi, &imm_lo);
+            insn = (insn & 0x00000fff) | (imm_hi << 12);
+            rv_set_val((uint16 *)addr, insn);
+            break;
+        }
+
+        case R_RISCV_LO12_I:
+        {
+            val = (int32)(intptr_t)(symbol_addr + reloc_addend);
+
+            CHECK_RELOC_OFFSET(sizeof(uint32));
+            if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend)) {
+                goto fail_addr_out_of_range;
+            }
+
+            addr = target_section_addr + reloc_offset;
+            insn = rv_get_val((uint16 *)addr);
+            rv_calc_imm(val, &imm_hi, &imm_lo);
+            insn = (insn & 0x000fffff) | (imm_lo << 20);
+            rv_set_val((uint16 *)addr, insn);
+            break;
+        }
+
+        case R_RISCV_LO12_S:
+        {
+            val = (int32)(intptr_t)(symbol_addr + reloc_addend);
+
+            CHECK_RELOC_OFFSET(sizeof(uint32));
+            if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend)) {
+                goto fail_addr_out_of_range;
+            }
+
+            addr = target_section_addr + reloc_offset;
+            rv_calc_imm(val, &imm_hi, &imm_lo);
+            val =
+              (((int32)imm_lo >> 5) << 25) + (((int32)imm_lo & 0x1f) << 7);
+            rv_add_val((uint16 *)addr, val);
+            break;
+        }
+
+        default:
+            if (error_buf != NULL)
+                snprintf(error_buf, error_buf_size,
+                         "Load relocation section failed: "
+                         "invalid relocation type %d.",
+                         reloc_type);
+            return false;
+    }
+
+    return true;
+
+fail_addr_out_of_range:
+    snprintf(buf, sizeof(buf),
+             "AOT module load failed: "
+             "relocation truncated to fit %s failed.",
+             reloc_type_to_str(reloc_type));
+    set_error_buf(error_buf, error_buf_size, buf);
+    return false;
+}

+ 2 - 0
core/iwasm/aot/iwasm_aot.cmake

@@ -23,6 +23,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "MIPS")
   set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_mips.c)
 elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA")
   set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_xtensa.c)
+elseif (WAMR_BUILD_TARGET MATCHES "RISCV*")
+  set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_riscv.c)
 else ()
   message (FATAL_ERROR "Build target isn't set")
 endif ()

+ 148 - 0
core/iwasm/common/arch/invokeNative_riscv.S

@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2019 Intel Corporation.  All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+/*
+ * The float abi macros used bellow are from risc-v c api:
+ * https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md
+ *
+ */
+
+#if defined(__riscv_float_abi_soft)
+#define RV_FPREG_SIZE    0
+#elif defined(__riscv_float_abi_single)
+#define RV_OP_LOADFPREG  flw
+#define RV_OP_STROEFPREG fsw
+#define RV_FPREG_SIZE    4
+#elif defined(__riscv_float_abi_double)
+#define RV_OP_LOADFPREG  fld
+#define RV_OP_STROEFPREG fsd
+#define RV_FPREG_SIZE    8
+#endif
+
+#if __riscv_xlen == 32
+#define RV_OP_LOADREG  lw
+#define RV_OP_STOREREG sw
+#define RV_REG_SIZE    4
+#define RV_REG_SHIFT   2
+#define RV_FP_OFFSET   (8 * RV_REG_SIZE)
+#define RV_INT_OFFSET  0
+#else
+#define RV_OP_LOADREG  ld
+#define RV_OP_STOREREG sd
+#define RV_REG_SIZE    8
+#define RV_REG_SHIFT   3
+#define RV_FP_OFFSET   0
+#define RV_INT_OFFSET  (8 * RV_FPREG_SIZE)
+#endif
+
+        .text
+        .align  2
+#ifndef BH_PLATFORM_DARWIN
+        .globl invokeNative
+        .type  invokeNative, function
+invokeNative:
+#else
+        .globl _invokeNative
+_invokeNative:
+#endif /* end of BH_PLATFORM_DARWIN */
+
+/*
+ * Arguments passed in:
+ *
+ * a0 function ptr
+ * a1 argv
+ * a2 nstacks
+ */
+
+/*
+ * sp (stack pointer)
+ *    |- sd/sw to store 64/32-bit values from register to memory
+ *    |- ld/lw to load from stack to register
+ * fp/s0 (frame pointer)
+ * a0-a7 (8 integer arguments)
+ *    |- sd/sw to store
+ *    |- ld/lw to load
+ * fa0-a7 (8 float arguments)
+ *    |- fsd/fsw to store
+ *    |- fld/fsw to load
+ * t0-t6 (temporaries regisgers)
+ *    |- caller saved
+ */
+
+        /* reserve space on stack to save return address and frame pointer */
+        addi             sp, sp, - 2 * RV_REG_SIZE
+        RV_OP_STOREREG   fp, 0 * RV_REG_SIZE(sp)    /* save frame pointer */
+        RV_OP_STOREREG   ra, 1 * RV_REG_SIZE(sp)    /* save return address */
+
+        mv               fp, sp                     /* set frame pointer to bottom of fixed frame */
+
+        /* save function ptr, argv & nstacks */
+        mv               t0, a0                     /* t0 = function ptr */
+        mv               t1, a1                     /* t1 = argv array address */
+        mv               t2, a2                     /* t2 = nstack */
+
+#ifndef __riscv_float_abi_soft
+        /* fill in fa0-7 float-registers*/
+        RV_OP_LOADFPREG  fa0, RV_FP_OFFSET + 0 * RV_FPREG_SIZE(t1) /* fa0 */
+        RV_OP_LOADFPREG  fa1, RV_FP_OFFSET + 1 * RV_FPREG_SIZE(t1) /* fa1 */
+        RV_OP_LOADFPREG  fa2, RV_FP_OFFSET + 2 * RV_FPREG_SIZE(t1) /* fa2 */
+        RV_OP_LOADFPREG  fa3, RV_FP_OFFSET + 3 * RV_FPREG_SIZE(t1) /* fa3 */
+        RV_OP_LOADFPREG  fa4, RV_FP_OFFSET + 4 * RV_FPREG_SIZE(t1) /* fa4 */
+        RV_OP_LOADFPREG  fa5, RV_FP_OFFSET + 5 * RV_FPREG_SIZE(t1) /* fa5 */
+        RV_OP_LOADFPREG  fa6, RV_FP_OFFSET + 6 * RV_FPREG_SIZE(t1) /* fa6 */
+        RV_OP_LOADFPREG  fa7, RV_FP_OFFSET + 7 * RV_FPREG_SIZE(t1) /* fa7 */
+#endif
+
+        /* fill in a0-7 integer-registers*/
+        RV_OP_LOADREG    a0, RV_INT_OFFSET + 0 * RV_REG_SIZE(t1)    /* a0 */
+        RV_OP_LOADREG    a1, RV_INT_OFFSET + 1 * RV_REG_SIZE(t1)    /* a1 */
+        RV_OP_LOADREG    a2, RV_INT_OFFSET + 2 * RV_REG_SIZE(t1)    /* a2 */
+        RV_OP_LOADREG    a3, RV_INT_OFFSET + 3 * RV_REG_SIZE(t1)    /* a3 */
+        RV_OP_LOADREG    a4, RV_INT_OFFSET + 4 * RV_REG_SIZE(t1)    /* a4 */
+        RV_OP_LOADREG    a5, RV_INT_OFFSET + 5 * RV_REG_SIZE(t1)    /* a5 */
+        RV_OP_LOADREG    a6, RV_INT_OFFSET + 6 * RV_REG_SIZE(t1)    /* a6 */
+        RV_OP_LOADREG    a7, RV_INT_OFFSET + 7 * RV_REG_SIZE(t1)    /* a7 */
+
+        /* t1 points to stack args */
+
+        /* RV_FPREG_SIZE is zero when __riscv_float_abi_soft defined */
+        addi             t1, t1, RV_REG_SIZE * 8 + RV_FPREG_SIZE * 8
+
+        /* directly call the function if no args in stack,
+           x0 always holds 0 */
+        beq              t2, x0, call_func
+
+        /* reserve enough stack space for function arguments */
+        sll              t3, t2, RV_REG_SHIFT       /* shift left 3 bits. t3 = n_stacks * 8 */
+        sub              sp, sp, t3
+
+        /* make 16-byte aligned */
+        li               t3, 15
+        not              t3, t3
+        and              sp, sp, t3
+
+        /* save sp in t4 register */
+        mv               t4, sp
+
+        /* copy left arguments from caller stack to own frame stack */
+loop_stack_args:
+        beq              t2, x0, call_func
+        RV_OP_LOADREG    t5, 0(t1)                  /* load stack argument, t5 = argv[i] */
+        RV_OP_STOREREG   t5, 0(t4)                  /* store t5 to reseved stack, sp[j] = t5 */
+        addi             t1, t1, RV_REG_SIZE        /* move to next stack argument */
+        addi             t4, t4, RV_REG_SIZE        /* move to next stack pointer */
+        addi             t2, t2, -1                 /* decrease t2 every loop, nstacks = nstacks -1 */
+        j loop_stack_args
+
+call_func:
+        jalr             t0
+
+        /* restore registers pushed in stack or saved in another register */
+return:
+        mv               sp, fp                     /* restore sp saved in fp before function call */
+        RV_OP_LOADREG    fp, 0 * RV_REG_SIZE(sp)    /* load previous frame poniter to fp register */
+        RV_OP_LOADREG    ra, 1 * RV_REG_SIZE(sp)    /* load previous return address to ra register */
+        addi             sp, sp, 2 * RV_REG_SIZE    /* pop frame, restore sp */
+        jr               ra

+ 0 - 95
core/iwasm/common/arch/invokeNative_riscv32_ilp32.s

@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-        .text
-        .align  2
-#ifndef BH_PLATFORM_DARWIN
-        .globl invokeNative
-        .type  invokeNative, function
-invokeNative:
-#else
-        .globl _invokeNative
-_invokeNative:
-#endif /* end of BH_PLATFORM_DARWIN */
-
-
-/*
- * Arguments passed in:
- *
- * a0 function ptr
- * a1 argv
- * a2 nstacks
- */
-
-/*
- * sp (stack pointer)
- *    |- sw to store 32-bit values from register to memory
- *    |- lw to load from stack to register
- * fp/s0 (frame pointer)
- * a0-a7 (8 integer arguments)
- *    |- sw to store
- *    |- lw to load
- * t0-t6 (temporaries regisgers)
- *    |- caller saved
- */
-
-        /* reserve space on stack to save return address and frame pointer */
-        addi      sp, sp, -8
-        sw        fp, 0(sp)            /* save frame pointer */
-        sw        ra, 4(sp)            /* save return address */
-
-        mv        fp, sp               /* set frame pointer to bottom of fixed frame */
-
-        /* save function ptr, argv & nstacks */
-        mv        t0, a0               /* t0 = function ptr */
-        mv        t1, a1               /* t1 = argv array address */
-        mv        t2, a2               /* t2 = nstack */
-
-        /* fill in a0-7 integer-registers */
-        lw        a0, 0(t1)            /* a0 = argv[0] */
-        lw        a1, 4(t1)            /* a1 = argv[1] */
-        lw        a2, 8(t1)            /* a2 = argv[2] */
-        lw        a3, 12(t1)           /* a3 = argv[3] */
-        lw        a4, 16(t1)           /* a4 = argv[4] */
-        lw        a5, 20(t1)           /* a5 = argv[5] */
-        lw        a6, 24(t1)           /* a6 = argv[6] */
-        lw        a7, 28(t1)           /* a7 = argv[7] */
-
-        addi      t1, t1, 32           /* t1 points to stack args */
-
-        /* directly call the function if no args in stack,
-           x0 always holds 0 */
-        beq       t2, x0, call_func
-
-        /* reserve enough stack space for function arguments */
-        sll       t3, t2, 2             /* shift left 2 bits. t3 = n_stacks * 4 */
-        sub       sp, sp, t3
-
-        /* make 16-byte aligned */
-        and       sp, sp, ~15
-
-        /* save sp in t4 register */
-        mv        t4, sp
-
-        /* copy left arguments from caller stack to own frame stack */
-loop_stack_args:
-        beq       t2, x0, call_func
-        lw        t5, 0(t1)             /* load stack argument, t5 = argv[i] */
-        sw        t5, 0(t4)             /* store t5 to reseved stack, sp[j] = t5 */
-        addi      t1, t1, 4             /* move to next stack argument */
-        addi      t4, t4, 4             /* move to next stack pointer */
-        addi      t2, t2, -1            /* decrease t2 every loop, nstacks = nstacks -1 */
-        j loop_stack_args
-
-call_func:
-        jalr      t0
-
-        /* restore registers pushed in stack or saved in another register */
-return:
-        mv        sp, fp                /* restore sp saved in fp before function call */
-        lw        fp, 0(sp)             /* load previous frame poniter to fp register */
-        lw        ra, 4(sp)             /* load previous return address to ra register */
-        addi      sp, sp, 8             /* pop frame, restore sp */
-        jr        ra
-

+ 0 - 104
core/iwasm/common/arch/invokeNative_riscv32_ilp32d.s

@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-        .text
-        .align  2
-#ifndef BH_PLATFORM_DARWIN
-        .globl invokeNative
-        .type  invokeNative, function
-invokeNative:
-#else
-        .globl _invokeNative
-_invokeNative:
-#endif /* end of BH_PLATFORM_DARWIN */
-
-
-/*
- * Arguments passed in:
- *
- * a0 function ptr
- * a1 argv
- * a2 nstacks
- */
-
-/*
- * sp (stack pointer)
- *    |- sw to store 32-bit values from register to memory
- *    |- lw to load from stack to register
- * fp/s0 (frame pointer)
- * a0-a7 (8 integer arguments)
- *    |- sw to store
- *    |- lw to load
- * t0-t6 (temporaries regisgers)
- *    |- caller saved
- */
-
-        /* reserve space on stack to save return address and frame pointer */
-        addi      sp, sp, -8
-        sw        fp, 0(sp)            /* save frame pointer */
-        sw        ra, 4(sp)            /* save return address */
-
-        mv        fp, sp               /* set frame pointer to bottom of fixed frame */
-
-        /* save function ptr, argv & nstacks */
-        mv        t0, a0               /* t0 = function ptr */
-        mv        t1, a1               /* t1 = argv array address */
-        mv        t2, a2               /* t2 = nstack */
-
-        /* fill in a0-7 integer-registers */
-        lw        a0, 0(t1)            /* a0 = argv[0] */
-        lw        a1, 4(t1)            /* a1 = argv[1] */
-        lw        a2, 8(t1)            /* a2 = argv[2] */
-        lw        a3, 12(t1)           /* a3 = argv[3] */
-        lw        a4, 16(t1)           /* a4 = argv[4] */
-        lw        a5, 20(t1)           /* a5 = argv[5] */
-        lw        a6, 24(t1)           /* a6 = argv[6] */
-        lw        a7, 28(t1)           /* a7 = argv[7] */
-
-        /* fill in fa0-7 float-registers*/
-        fld       fa0, 32(t1)          /* fa0 = argv[8] */
-        fld       fa1, 40(t1)          /* fa1 = argv[9] */
-        fld       fa2, 48(t1)          /* fa2 = argv[10] */
-        fld       fa3, 56(t1)          /* fa3 = argv[11] */
-        fld       fa4, 64(t1)          /* fa4 = argv[12] */
-        fld       fa5, 72(t1)          /* fa5 = argv[13] */
-        fld       fa6, 80(t1)          /* fa6 = argv[14] */
-        fld       fa7, 88(t1)          /* fa7 = argv[15] */
-
-        addi      t1, t1, 96           /* t1 points to stack args */
-
-        /* directly call the function if no args in stack,
-           x0 always holds 0 */
-        beq       t2, x0, call_func
-
-        /* reserve enough stack space for function arguments */
-        sll       t3, t2, 2             /* shift left 2 bits. t3 = n_stacks * 4 */
-        sub       sp, sp, t3
-
-        /* make 16-byte aligned */
-        and       sp, sp, ~15
-
-        /* save sp in t4 register */
-        mv        t4, sp
-
-        /* copy left arguments from caller stack to own frame stack */
-loop_stack_args:
-        beq       t2, x0, call_func
-        lw        t5, 0(t1)             /* load stack argument, t5 = argv[i] */
-        sw        t5, 0(t4)             /* store t5 to reseved stack, sp[j] = t5 */
-        addi      t1, t1, 4             /* move to next stack argument */
-        addi      t4, t4, 4             /* move to next stack pointer */
-        addi      t2, t2, -1            /* decrease t2 every loop, nstacks = nstacks -1 */
-        j loop_stack_args
-
-call_func:
-        jalr      t0
-
-        /* restore registers pushed in stack or saved in another register */
-return:
-        mv        sp, fp                /* restore sp saved in fp before function call */
-        lw        fp, 0(sp)             /* load previous frame poniter to fp register */
-        lw        ra, 4(sp)             /* load previous return address to ra register */
-        addi      sp, sp, 8             /* pop frame, restore sp */
-        jr        ra

+ 0 - 95
core/iwasm/common/arch/invokeNative_riscv64_lp64.s

@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-        .text
-        .align  2
-#ifndef BH_PLATFORM_DARWIN
-        .globl invokeNative
-        .type  invokeNative, function
-invokeNative:
-#else
-        .globl _invokeNative
-_invokeNative:
-#endif /* end of BH_PLATFORM_DARWIN */
-
-
-/*
- * Arguments passed in:
- *
- * a0 function ptr
- * a1 argv
- * a2 nstacks
- */
-
-/*
- * sp (stack pointer)
- *    |- sd to store 64-bit values from register to memory
- *    |- ld to load from stack to register
- * fp/s0 (frame pointer)
- * a0-a7 (8 integer arguments)
- *    |- sd to store
- *    |- ld to load
- * t0-t6 (temporaries regisgers)
- *    |- caller saved
- */
-
-        /* reserve space on stack to save return address and frame pointer */
-        addi      sp, sp, -16
-        sd        fp, 0(sp)            /* save frame pointer */
-        sd        ra, 8(sp)            /* save return address */
-
-        mv        fp, sp               /* set frame pointer to bottom of fixed frame */
-
-        /* save function ptr, argv & nstacks */
-        mv        t0, a0               /* t0 = function ptr */
-        mv        t1, a1               /* t1 = argv array address */
-        mv        t2, a2               /* t2 = nstack */
-
-        /* fill in a0-7 integer-registers*/
-        ld        a0, 0(t1)            /* a0 = argv[0] */
-        ld        a1, 8(t1)            /* a1 = argv[1] */
-        ld        a2, 16(t1)           /* a2 = argv[2] */
-        ld        a3, 24(t1)           /* a3 = argv[3] */
-        ld        a4, 32(t1)           /* a4 = argv[4] */
-        ld        a5, 40(t1)           /* a5 = argv[5] */
-        ld        a6, 48(t1)           /* a6 = argv[6] */
-        ld        a7, 56(t1)           /* a7 = argv[7] */
-
-        addi      t1, t1, 64           /* t1 points to stack args */
-
-        /* directly call the function if no args in stack,
-           x0 always holds 0 */
-        beq       t2, x0, call_func
-
-        /* reserve enough stack space for function arguments */
-        sll       t3, t2, 3             /* shift left 3 bits. t3 = n_stacks * 8 */
-        sub       sp, sp, t3
-
-        /* make 16-byte aligned */
-        and       sp, sp, ~(15LL)
-
-        /* save sp in t4 register */
-        mv        t4, sp
-
-        /* copy left arguments from caller stack to own frame stack */
-loop_stack_args:
-        beq       t2, x0, call_func
-        ld        t5, 0(t1)             /* load stack argument, t5 = argv[i] */
-        sd        t5, 0(t4)             /* store t5 to reseved stack, sp[j] = t5 */
-        addi      t1, t1, 8             /* move to next stack argument */
-        addi      t4, t4, 8             /* move to next stack pointer */
-        addi      t2, t2, -1            /* decrease t2 every loop, nstacks = nstacks -1 */
-        j loop_stack_args
-
-call_func:
-        jalr      t0
-
-        /* restore registers pushed in stack or saved in another register */
-return:
-        mv        sp, fp                /* restore sp saved in fp before function call */
-        ld        fp, 0(sp)             /* load previous frame poniter to fp register */
-        ld        ra, 8(sp)             /* load previous return address to ra register */
-        addi      sp, sp, 16            /* pop frame, restore sp */
-        jr        ra
-

+ 0 - 108
core/iwasm/common/arch/invokeNative_riscv64_lp64d.s

@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2019 Intel Corporation.  All rights reserved.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-        .text
-        .align  2
-#ifndef BH_PLATFORM_DARWIN
-        .globl invokeNative
-        .type  invokeNative, function
-invokeNative:
-#else
-        .globl _invokeNative
-_invokeNative:
-#endif /* end of BH_PLATFORM_DARWIN */
-
-/*
- * Arguments passed in:
- *
- * a0 function ptr
- * a1 argv
- * a2 nstacks
- */
-
-/*
- * sp (stack pointer)
- *    |- sd to store 64-bit values from register to memory
- *    |- ld to load from stack to register
- * fp/s0 (frame pointer)
- * a0-a7 (8 integer arguments)
- *    |- sd to store
- *    |- ld to load
- * fa0-a7 (8 float arguments)
- *    |- fsd to store
- *    |- fld to load
- * t0-t6 (temporaries regisgers)
- *    |- caller saved
- */
-
-        /* reserve space on stack to save return address and frame pointer */
-        addi      sp, sp, -16
-        sd        fp, 0(sp)             /* save frame pointer */
-        sd        ra, 8(sp)             /* save return address */
-
-        mv        fp, sp                /* set frame pointer to bottom of fixed frame */
-
-        /* save function ptr, argv & nstacks */
-        mv        t0, a0                /* t0 = function ptr */
-        mv        t1, a1                /* t1 = argv array address */
-        mv        t2, a2                /* t2 = nstack */
-
-        /* fill in fa0-7 float-registers*/
-        fld       fa0, 0(t1)            /* fa0 = argv[0] */
-        fld       fa1, 8(t1)            /* fa1 = argv[1] */
-        fld       fa2, 16(t1)           /* fa2 = argv[2] */
-        fld       fa3, 24(t1)           /* fa3 = argv[3] */
-        fld       fa4, 32(t1)           /* fa4 = argv[4] */
-        fld       fa5, 40(t1)           /* fa5 = argv[5] */
-        fld       fa6, 48(t1)           /* fa6 = argv[6] */
-        fld       fa7, 56(t1)           /* fa7 = argv[7] */
-
-        /* fill in a0-7 integer-registers*/
-        ld        a0, 64(t1)            /* a0 = argv[8] */
-        ld        a1, 72(t1)            /* a1 = argv[9] */
-        ld        a2, 80(t1)            /* a2 = argv[10] */
-        ld        a3, 88(t1)            /* a3 = argv[11] */
-        ld        a4, 96(t1)            /* a4 = argv[12] */
-        ld        a5, 104(t1)           /* a5 = argv[13] */
-        ld        a6, 112(t1)           /* a6 = argv[14] */
-        ld        a7, 120(t1)           /* a7 = argv[15] */
-
-        addi      t1, t1, 128           /* t1 points to stack args */
-
-        /* directly call the function if no args in stack,
-           x0 always holds 0 */
-        beq       t2, x0, call_func
-
-        /* reserve enough stack space for function arguments */
-        sll       t3, t2, 3             /* shift left 3 bits. t3 = n_stacks * 8 */
-        sub       sp, sp, t3
-
-        /* make 16-byte aligned */
-        and       sp, sp, ~(15LL)
-
-        /* save sp in t4 register */
-        mv        t4, sp
-
-        /* copy left arguments from caller stack to own frame stack */
-loop_stack_args:
-        beq       t2, x0, call_func
-        ld        t5, 0(t1)             /* load stack argument, t5 = argv[i] */
-        sd        t5, 0(t4)             /* store t5 to reseved stack, sp[j] = t5 */
-        addi      t1, t1, 8             /* move to next stack argument */
-        addi      t4, t4, 8             /* move to next stack pointer */
-        addi      t2, t2, -1            /* decrease t2 every loop, nstacks = nstacks -1 */
-        j loop_stack_args
-
-call_func:
-        jalr      t0
-
-        /* restore registers pushed in stack or saved in another register */
-return:
-        mv        sp, fp                /* restore sp saved in fp before function call */
-        ld        fp, 0(sp)             /* load previous frame poniter to fp register */
-        ld        ra, 8(sp)             /* load previous return address to ra register */
-        addi      sp, sp, 16            /* pop frame, restore sp */
-        jr        ra
-
-

+ 2 - 8
core/iwasm/common/iwasm_common.cmake

@@ -66,14 +66,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "MIPS")
   set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_mips.s)
 elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA")
   set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_xtensa.s)
-elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64" OR WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64D")
-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv64_lp64d.s)
-elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64")
-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv64_lp64.s)
-elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32D")
-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32d.s)
-elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
-  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv32_ilp32.s)
+elseif (WAMR_BUILD_TARGET MATCHES "RISCV*")
+  set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv.S)
 else ()
   message (FATAL_ERROR "Build target isn't set")
 endif ()

+ 2 - 1
core/iwasm/common/wasm_runtime_common.c

@@ -3061,7 +3061,8 @@ typedef union __declspec(intrin_type) __declspec(align(8)) v128 {
     unsigned __int32 m128i_u32[4];
     unsigned __int64 m128i_u64[2];
 } v128;
-#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
+#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
+      || defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
 typedef long long v128 __attribute__ ((__vector_size__ (16),
                                        __may_alias__, __aligned__ (1)));
 #elif defined(BUILD_TARGET_AARCH64)

+ 9 - 3
core/iwasm/compilation/aot_emit_aot_file.c

@@ -1752,6 +1752,7 @@ is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
     uint32 relocation_count = 0;
 
     return (!strcmp(section_name, ".data")
+            || !strcmp(section_name, ".sdata")
             || !strcmp(section_name, ".rodata")
             /* ".rodata.cst4/8/16/.." */
             || !strncmp(section_name, ".rodata.cst", strlen(".rodata.cst"))
@@ -1970,10 +1971,13 @@ aot_resolve_object_relocation_group(AOTObjectData *obj_data,
         relocation->relocation_type = (uint32)type;
         relocation->symbol_name = (char *)LLVMGetSymbolName(rel_sym);
 
-        /* for ".LCPIxxx" relocation, transform the symbol name to real
-         * section name and set addend to the symbol address */
+        /* for ".LCPIxxx", ".LJTIxxx" and ".LBBxxx" relocation,
+         * transform the symbol name to real section name and set
+         * addend to the offset of the symbol in the real section */
         if (relocation->symbol_name
-            && str_starts_with(relocation->symbol_name, ".LCPI")) {
+            && (str_starts_with(relocation->symbol_name, ".LCPI")
+                || str_starts_with(relocation->symbol_name, ".LJTI")
+                || str_starts_with(relocation->symbol_name, ".LBB"))) {
             /* change relocation->relocation_addend and relocation->symbol_name */
             LLVMSectionIteratorRef contain_section;
             if (!(contain_section
@@ -2012,6 +2016,8 @@ is_relocation_section_name(char *section_name)
             || !strcmp(section_name, ".rela.literal")
             || !strcmp(section_name, ".rela.data")
             || !strcmp(section_name, ".rel.data")
+            || !strcmp(section_name, ".rela.sdata")
+            || !strcmp(section_name, ".rel.sdata")
             || !strcmp(section_name, ".rela.rodata")
             || !strcmp(section_name, ".rel.rodata")
             /* ".rela.rodata.cst4/8/16/.." */

+ 8 - 0
core/iwasm/compilation/aot_emit_numberic.c

@@ -767,6 +767,12 @@ is_target_mips(AOTCompContext *comp_ctx)
     return !strncmp(comp_ctx->target_arch, "mips", 4);
 }
 
+static bool
+is_target_riscv(AOTCompContext *comp_ctx)
+{
+    return !strncmp(comp_ctx->target_arch, "riscv", 5);
+}
+
 static bool
 is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
 {
@@ -796,6 +802,8 @@ is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
          * so user must specify '--cpu-features=-fp' to wamrc if the target
          * doesn't have or enable Floating-Point Coprocessor Option on xtensa. */
         ret = (!is_f32 || strstr(feature_string, "-fp")) ? true : false;
+    else if (is_target_riscv(comp_ctx))
+        ret = !strstr(feature_string, "+d") ? true : false;
     else
         ret = true;
 

+ 42 - 3
core/iwasm/compilation/aot_llvm.c

@@ -1087,14 +1087,22 @@ static ArchItem valid_archs[] = {
     { "thumbv8r", true },
     { "thumbv8m.base", true },
     { "thumbv8m.main", true },
-    { "thumbv8.1m.main", true }
+    { "thumbv8.1m.main", true },
+    { "riscv32", true},
+    { "riscv64", true}
 };
 
 static const char *valid_abis[] = {
     "gnu",
     "eabi",
     "gnueabihf",
-    "msvc"
+    "msvc",
+    "ilp32",
+    "ilp32f",
+    "ilp32d",
+    "lp64",
+    "lp64f",
+    "lp64d"
 };
 
 static void
@@ -1184,7 +1192,7 @@ aot_create_comp_context(AOTCompData *comp_data,
     char *cpu = NULL, *features, buf[128];
     char *triple_norm_new = NULL, *cpu_new = NULL;
     char *err = NULL, *fp_round= "round.tonearest", *fp_exce = "fpexcept.strict";
-    char triple_buf[32] = {0};
+    char triple_buf[32] = { 0 }, features_buf[128] = { 0 };
     uint32 opt_level, size_level;
     LLVMCodeModel code_model;
     LLVMTargetDataRef target_data_ref;
@@ -1323,6 +1331,14 @@ aot_create_comp_context(AOTCompData *comp_data,
             goto fail;
         }
 
+        /* Set default abi for riscv target */
+        if (arch && !strncmp(arch, "riscv", 5) && !abi) {
+            if (!strcmp(arch, "riscv64"))
+                abi = "lp64d";
+            else
+                abi = "ilp32d";
+        }
+
         if (arch) {
             /* Construct target triple: <arch>-<vendor>-<sys>-<abi> */
             const char *vendor_sys;
@@ -1394,6 +1410,29 @@ aot_create_comp_context(AOTCompData *comp_data,
             goto fail;
         }
 
+        /* Add module flag and cpu feature for riscv target */
+        if (arch && !strncmp(arch, "riscv", 5)) {
+            LLVMMetadataRef meta_target_abi;
+
+            if (!(meta_target_abi = LLVMMDStringInContext2(comp_ctx->context,
+                                                           abi, strlen(abi)))) {
+                aot_set_last_error("create metadata string failed.");
+                goto fail;
+            }
+            LLVMAddModuleFlag(comp_ctx->module, LLVMModuleFlagBehaviorError,
+                          "target-abi", strlen("target-abi"), meta_target_abi);
+
+            if (!strcmp(abi, "lp64d") || !strcmp(abi, "ilp32d")) {
+                if (features) {
+                    snprintf(features_buf, sizeof(features_buf),
+                             "%s%s", features, ",+d");
+                    features = features_buf;
+                }
+                else
+                    features = "+d";
+            }
+        }
+
         if (!features)
             features = "";
 

+ 1 - 1
core/iwasm/interpreter/wasm_interp_classic.c

@@ -2785,7 +2785,7 @@ label_pop_csp_n:
           /* if s >= d, copy from front to back */
           /* if s < d, copy from back to front */
           /* merge all together */
-          bh_memcpy_s(
+          bh_memmove_s(
             (uint8 *)(dst_tbl_inst) + offsetof(WASMTableInstance, base_addr)
               + d * sizeof(uint32),
             (dst_tbl_inst->cur_size - d) * sizeof(uint32),

+ 1 - 1
core/iwasm/interpreter/wasm_interp_fast.c

@@ -2761,7 +2761,7 @@ recover_br_info:
             /* if s >= d, copy from front to back */
             /* if s < d, copy from back to front */
             /* merge all together */
-            bh_memcpy_s(
+            bh_memmove_s(
               (uint8 *)dst_tbl_inst + offsetof(WASMTableInstance, base_addr)
                 + d * sizeof(uint32),
               (dst_tbl_inst->cur_size - d) * sizeof(uint32),

+ 4 - 2
core/shared/platform/android/platform_internal.h

@@ -61,7 +61,9 @@ typedef pthread_t korp_thread;
 #if WASM_DISABLE_HW_BOUND_CHECK == 0
 #if defined(BUILD_TARGET_X86_64) \
     || defined(BUILD_TARGET_AMD_64) \
-    || defined(BUILD_TARGET_AARCH64)
+    || defined(BUILD_TARGET_AARCH64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) \
+    || defined(BUILD_TARGET_RISCV64_LP64)
 
 #include <setjmp.h>
 
@@ -86,7 +88,7 @@ bool os_thread_signal_inited();
 void os_signal_unmask();
 
 void os_sigreturn();
-#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64 */
+#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64/RISCV64 */
 #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
 
 typedef long int __syscall_slong_t;

+ 50 - 1
core/shared/platform/common/posix/posix_memmap.c

@@ -44,8 +44,57 @@ os_mmap(void *hint, size_t size, int prot, int flags)
     if (flags & MMAP_MAP_FIXED)
         map_flags |= MAP_FIXED;
 
+#if defined(BUILD_TARGET_RISCV64_LP64D) || defined(BUILD_TARGET_RISCV64_LP64)
+    /* As AOT relocation in RISCV64 may require that the code/data mapped
+     * is in range 0 to 2GB, we try to map the memory with hint address
+     * (mmap's first argument) to meet the requirement.
+     */
+    if (!hint && !(flags & MMAP_MAP_FIXED) && (flags & MMAP_MAP_32BIT)) {
+        uint8 *stack_addr = (uint8*)&map_prot;
+        uint8 *text_addr = (uint8*)os_mmap;
+        /* hint address begins with 1MB */
+        static uint8 *hint_addr = (uint8 *)(uintptr_t)BH_MB;
+
+        if ((hint_addr - text_addr >= 0
+             && hint_addr - text_addr < 100 * BH_MB)
+            || (text_addr - hint_addr >= 0
+                && text_addr - hint_addr < 100 * BH_MB)) {
+            /* hint address is possibly in text section, skip it */
+            hint_addr += 100 * BH_MB;
+        }
+
+        if ((hint_addr - stack_addr >= 0
+             && hint_addr - stack_addr < 8 * BH_MB)
+            || (stack_addr - hint_addr >= 0
+                && stack_addr - hint_addr < 8 * BH_MB)) {
+            /* hint address is possibly in native stack area, skip it */
+            hint_addr += 8 * BH_MB;
+        }
+
+        /* try 10 times, step with 1MB each time */
+        for (i = 0;
+             i < 10 && hint_addr < (uint8 *)(uintptr_t)(2ULL * BH_GB);
+             i++) {
+            addr = mmap(hint_addr, request_size, map_prot, map_flags, -1, 0);
+            if (addr != MAP_FAILED) {
+                if (addr > (uint8 *)(uintptr_t)(2ULL * BH_GB)) {
+                    /* unmap and try again if the mapped address doesn't
+                     * meet the requirement */
+                    os_munmap(addr, request_size);
+                }
+                else {
+                    /* reset next hint address */
+                    hint_addr += request_size;
+                    return addr;
+                }
+            }
+            hint_addr += BH_MB;
+        }
+    }
+#endif
+
     /* try 5 times */
-    for (i = 0; i < 5; i ++) {
+    for (i = 0; i < 5; i++) {
         addr = mmap(hint, request_size, map_prot, map_flags, -1, 0);
         if (addr != MAP_FAILED)
             break;

+ 4 - 2
core/shared/platform/darwin/platform_internal.h

@@ -62,7 +62,9 @@ typedef pthread_t korp_thread;
 #if WASM_DISABLE_HW_BOUND_CHECK == 0
 #if defined(BUILD_TARGET_X86_64) \
     || defined(BUILD_TARGET_AMD_64) \
-    || defined(BUILD_TARGET_AARCH64)
+    || defined(BUILD_TARGET_AARCH64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) \
+    || defined(BUILD_TARGET_RISCV64_LP64)
 
 #include <setjmp.h>
 
@@ -87,7 +89,7 @@ bool os_thread_signal_inited();
 void os_signal_unmask();
 
 void os_sigreturn();
-#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64 */
+#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64/RISCV64 */
 #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
 
 #ifdef __cplusplus

+ 4 - 2
core/shared/platform/linux/platform_internal.h

@@ -61,7 +61,9 @@ typedef pthread_t korp_thread;
 #if WASM_DISABLE_HW_BOUND_CHECK == 0
 #if defined(BUILD_TARGET_X86_64) \
     || defined(BUILD_TARGET_AMD_64) \
-    || defined(BUILD_TARGET_AARCH64)
+    || defined(BUILD_TARGET_AARCH64) \
+    || defined(BUILD_TARGET_RISCV64_LP64D) \
+    || defined(BUILD_TARGET_RISCV64_LP64)
 
 #include <setjmp.h>
 
@@ -86,7 +88,7 @@ bool os_thread_signal_inited();
 void os_signal_unmask();
 
 void os_sigreturn();
-#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64 */
+#endif /* end of BUILD_TARGET_X86_64/AMD_64/AARCH64/RISCV64 */
 #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
 
 #ifdef __cplusplus

+ 10 - 10
product-mini/platforms/nuttx/wamr.mk

@@ -72,27 +72,25 @@ ifeq (${CONFIG_ARCH_FPU},y)
   $(error riscv64 lp64f is unsupported)
 else ifeq (${CONFIG_ARCH_DPFPU}, y)
   CFLAGS += -DBUILD_TARGET_RISCV64_LP64D
-  INVOKE_NATIVE += invokeNative_riscv64_lp64d.s
 else
   CFLAGS += -DBUILD_TARGET_RISCV64_LP64
-  INVOKE_NATIVE += invokeNative_riscv64_lp64.s
 endif
+  INVOKE_NATIVE += invokeNative_riscv.S
 
-  AOT_RELOC :=
+  AOT_RELOC := aot_reloc_riscv.c
 
 else ifeq (${WAMR_BUILD_TARGET}, RISCV32)
 
 ifeq (${CONFIG_ARCH_FPU}, y)
   $(error riscv32 ilp32f is unsupported)
 else ifeq (${CONFIG_ARCH_DPFPU}, y)
-  CFLAGS += -DBUILD_TARGET_RISCV64_ILP32D
-  INVOKE_NATIVE += invokeNative_riscv32_ilp32d.s
+  CFLAGS += -DBUILD_TARGET_RISCV32_ILP32D
 else
-  CFLAGS += -DBUILD_TARGET_RISCV64_ILP32
-  INVOKE_NATIVE += invokeNative_riscv32_ilp32.s
+  CFLAGS += -DBUILD_TARGET_RISCV32_ILP32
 endif
 
-  AOT_RELOC :=
+  INVOKE_NATIVE += invokeNative_riscv.S
+  AOT_RELOC := aot_reloc_riscv.c
 
 else
   $(error Build target is unsupported)
@@ -182,7 +180,8 @@ CFLAGS += -Wno-strict-prototypes -Wno-shadow -Wno-unused-variable
 CFLAGS += -Wno-int-conversion -Wno-implicit-function-declaration
 
 CFLAGS += -I${CORE_ROOT} \
-		      -I${IWASM_ROOT}/include \
+          -I${IWASM_ROOT}/include \
+          -I${IWASM_ROOT}/interpreter \
           -I${IWASM_ROOT}/common \
           -I${IWASM_ROOT}/libraries/thread-mgr \
           -I${SHARED_ROOT}/include \
@@ -218,7 +217,8 @@ CSRCS += nuttx_platform.c \
          wasm_runtime_common.c \
          wasm_native.c \
          wasm_exec_env.c \
-         wasm_memory.c
+         wasm_memory.c \
+         wasm_c_api.c
 
 ASRCS += ${INVOKE_NATIVE}
 

+ 10 - 1
wamr-compiler/CMakeLists.txt

@@ -68,6 +68,14 @@ elseif (WAMR_BUILD_TARGET MATCHES "AARCH64.*")
 elseif (WAMR_BUILD_TARGET MATCHES "ARM.*")
   add_definitions(-DBUILD_TARGET_ARM)
   add_definitions(-DBUILD_TARGET="${WAMR_BUILD_TARGET}")
+elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64" OR WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64D")
+  add_definitions(-DBUILD_TARGET_RISCV64_LP64D)
+elseif (WAMR_BUILD_TARGET STREQUAL "RISCV64_LP64")
+  add_definitions(-DBUILD_TARGET_RISCV64_LP64)
+elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32" OR WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32D")
+  add_definitions(-DBUILD_TARGET_RISCV32_ILP32D)
+elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32")
+  add_definitions(-DBUILD_TARGET_RISCV32_ILP32)
 else ()
   message (FATAL_ERROR "-- Build target isn't set")
 endif ()
@@ -75,7 +83,8 @@ endif ()
 message ("-- Build as target ${WAMR_BUILD_TARGET}")
 
 if (CMAKE_SIZEOF_VOID_P EQUAL 8)
-  if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64" OR WAMR_BUILD_TARGET MATCHES "AARCH64.*")
+    if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64"
+        OR WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "RISCV64.*")
     if (NOT WAMR_BUILD_PLATFORM STREQUAL "windows")
       # Add -fPIC flag if build as 64-bit
       set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")

+ 1 - 1
wamr-compiler/build_llvm.py

@@ -66,7 +66,7 @@ def main():
         cmd = 'cmake ../llvm \
                 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
                 -DCMAKE_BUILD_TYPE:STRING="Release" \
-                -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \
+                -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
                 -DLLVM_INCLUDE_GO_TESTS=OFF \
                 -DLLVM_INCLUDE_TOOLS=OFF \
                 -DLLVM_INCLUDE_UTILS=OFF \

+ 1 - 1
wamr-compiler/build_llvm.sh

@@ -27,7 +27,7 @@ if [ ! -f bin/llvm-lto ]; then
   cmake ../llvm \
           -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
           -DCMAKE_BUILD_TYPE:STRING="Release" \
-          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \
+          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
           -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
           -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \
           -DLLVM_ENABLE_ZLIB:BOOL=OFF \

+ 1 - 1
wamr-compiler/build_llvm_xtensa.sh

@@ -27,7 +27,7 @@ if [ ! -f bin/llvm-lto ]; then
   cmake ../llvm \
           -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
           -DCMAKE_BUILD_TYPE:STRING="Release" \
-          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips" \
+          -DLLVM_TARGETS_TO_BUILD:STRING="X86;ARM;AArch64;Mips;RISCV" \
           -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="Xtensa" \
           -DLLVM_BUILD_LLVM_DYLIB:BOOL=OFF \
           -DLLVM_OPTIMIZED_TABLEGEN:BOOL=ON \

+ 5 - 2
wamr-compiler/main.c

@@ -20,11 +20,14 @@ print_help()
 {
   printf("Usage: wamrc [options] -o output_file wasm_file\n");
   printf("  --target=<arch-name>      Set the target arch, which has the general format: <arch><sub>\n");
-  printf("                            <arch> = x86_64, i386, aarch64, arm, thumb, xtensa, mips.\n");
+  printf("                            <arch> = x86_64, i386, aarch64, arm, thumb, xtensa, mips,\n");
+  printf("                                     riscv64, riscv32.\n");
   printf("                              Default is host arch, e.g. x86_64\n");
   printf("                            <sub> = for ex. on arm or thumb: v5, v6m, v7a, v7m, etc.\n");
   printf("                            Use --target=help to list supported targets\n");
-  printf("  --target-abi=<abi>        Set the target ABI, e.g. gnu, eabi, gnueabihf, etc. (default: gnu)\n");
+  printf("  --target-abi=<abi>        Set the target ABI, e.g. gnu, eabi, gnueabihf, msvc, etc.\n");
+  printf("                              Default is gnu if target isn't riscv64 or riscv32\n");
+  printf("                              For target riscv64 and riscv32, default is lp64d and ilp32d\n");
   printf("                            Use --target-abi=help to list all the ABI supported\n");
   printf("  --cpu=<cpu>               Set the target CPU (default: host CPU, e.g. skylake)\n");
   printf("                            Use --cpu=help to list all the CPU supported\n");