sunnycase 6 лет назад
Родитель
Сommit
573a00dfc8

+ 42 - 1
.vscode/settings.json

@@ -13,6 +13,47 @@
         "type_traits": "cpp",
         "vector": "cpp",
         "xhash": "cpp",
-        "xstring": "cpp"
+        "xstring": "cpp",
+        "xutility": "c",
+        "chrono": "cpp",
+        "algorithm": "cpp",
+        "cmath": "cpp",
+        "concepts": "cpp",
+        "cstddef": "cpp",
+        "cstdint": "cpp",
+        "cstdio": "cpp",
+        "cstdlib": "cpp",
+        "cstring": "cpp",
+        "cwchar": "cpp",
+        "exception": "cpp",
+        "forward_list": "cpp",
+        "fstream": "cpp",
+        "ios": "cpp",
+        "iosfwd": "cpp",
+        "iostream": "cpp",
+        "istream": "cpp",
+        "iterator": "cpp",
+        "limits": "cpp",
+        "memory": "cpp",
+        "new": "cpp",
+        "optional": "cpp",
+        "ostream": "cpp",
+        "ratio": "cpp",
+        "stdexcept": "cpp",
+        "streambuf": "cpp",
+        "system_error": "cpp",
+        "tuple": "cpp",
+        "typeinfo": "cpp",
+        "unordered_map": "cpp",
+        "unordered_set": "cpp",
+        "utility": "cpp",
+        "xfacet": "cpp",
+        "xiosbase": "cpp",
+        "xlocale": "cpp",
+        "xlocinfo": "cpp",
+        "xlocnum": "cpp",
+        "xmemory": "cpp",
+        "xstddef": "cpp",
+        "xtr1common": "cpp"
     }
 }

+ 28 - 10
lib/CMakeLists.txt

@@ -2,22 +2,39 @@
 
 # create driver library
 
+ADD_SUBDIRECTORY(nncase)
+
 FILE(GLOB_RECURSE LIB_SRC
-        "${CMAKE_CURRENT_LIST_DIR}/*.h"
-        "${CMAKE_CURRENT_LIST_DIR}/*.hpp"
-        "${CMAKE_CURRENT_LIST_DIR}/*.c"
-        "${CMAKE_CURRENT_LIST_DIR}/*.cpp"
-        "${CMAKE_CURRENT_LIST_DIR}/*.s"
-        "${CMAKE_CURRENT_LIST_DIR}/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.h"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.h"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.h"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.hpp"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.hpp"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.hpp"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.c"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.c"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.c"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.cpp"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.S"
         )
 
 FILE(GLOB_RECURSE ASSEMBLY_FILES
-        "${CMAKE_CURRENT_LIST_DIR}/*.s"
-        "${CMAKE_CURRENT_LIST_DIR}/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.s"
+        "${CMAKE_CURRENT_LIST_DIR}/bsp/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/drivers/*.S"
+        "${CMAKE_CURRENT_LIST_DIR}/utils/*.S"
         )
 
-include_directories(${SDK_ROOT}/third_party/xtl/include)
-include_directories(${CMAKE_CURRENT_LIST_DIR}/drivers/include ${CMAKE_CURRENT_LIST_DIR}/bsp/include ${CMAKE_CURRENT_LIST_DIR}/nncase/include)
+include_directories(${CMAKE_CURRENT_LIST_DIR}/drivers/include ${CMAKE_CURRENT_LIST_DIR}/bsp/include)
 
 SET_PROPERTY(SOURCE ${ASSEMBLY_FILES} PROPERTY LANGUAGE C)
 SET_SOURCE_FILES_PROPERTIES(${ASSEMBLY_FILES} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp -D __riscv64")
@@ -25,4 +42,5 @@ SET_SOURCE_FILES_PROPERTIES(${ASSEMBLY_FILES} PROPERTIES COMPILE_FLAGS "-x assem
 ADD_LIBRARY(kendryte
         ${LIB_SRC}
         )
+TARGET_LINK_LIBRARIES(kendryte PUBLIC nncase)
 SET_TARGET_PROPERTIES(kendryte PROPERTIES LINKER_LANGUAGE C)

+ 760 - 760
lib/bsp/syscalls.c

@@ -1,760 +1,760 @@
-/* Copyright 2018 Canaan Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Enable kernel-mode log API */
-
-#include <errno.h>
-#include <limits.h>
-#include <machine/syscall.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/unistd.h>
-#include "atomic.h"
-#include "clint.h"
-#include "dump.h"
-#include "fpioa.h"
-#include "interrupt.h"
-#include "syscalls.h"
-#include "sysctl.h"
-#include "syslog.h"
-#include "util.h"
-#include "iomem.h"
-
-/**
- * @note       System call list
- *
- * See also riscv-newlib/libgloss/riscv/syscalls.c
- *
- * | System call      | Number |
- * |------------------|--------|
- * | SYS_exit         | 93     |
- * | SYS_exit_group   | 94     |
- * | SYS_getpid       | 172    |
- * | SYS_kill         | 129    |
- * | SYS_read         | 63     |
- * | SYS_write        | 64     |
- * | SYS_open         | 1024   |
- * | SYS_openat       | 56     |
- * | SYS_close        | 57     |
- * | SYS_lseek        | 62     |
- * | SYS_brk          | 214    |
- * | SYS_link         | 1025   |
- * | SYS_unlink       | 1026   |
- * | SYS_mkdir        | 1030   |
- * | SYS_chdir        | 49     |
- * | SYS_getcwd       | 17     |
- * | SYS_stat         | 1038   |
- * | SYS_fstat        | 80     |
- * | SYS_lstat        | 1039   |
- * | SYS_fstatat      | 79     |
- * | SYS_access       | 1033   |
- * | SYS_faccessat    | 48     |
- * | SYS_pread        | 67     |
- * | SYS_pwrite       | 68     |
- * | SYS_uname        | 160    |
- * | SYS_getuid       | 174    |
- * | SYS_geteuid      | 175    |
- * | SYS_getgid       | 176    |
- * | SYS_getegid      | 177    |
- * | SYS_mmap         | 222    |
- * | SYS_munmap       | 215    |
- * | SYS_mremap       | 216    |
- * | SYS_time         | 1062   |
- * | SYS_getmainvars  | 2011   |
- * | SYS_rt_sigaction | 134    |
- * | SYS_writev       | 66     |
- * | SYS_gettimeofday | 169    |
- * | SYS_times        | 153    |
- * | SYS_fcntl        | 25     |
- * | SYS_getdents     | 61     |
- * | SYS_dup          | 23     |
- *
- */
-
-#ifndef UNUSED
-#define UNUSED(x) (void)(x)
-#endif
-
-static const char *TAG = "SYSCALL";
-
-extern char _heap_start[];
-extern char _heap_end[];
-char *_heap_cur = &_heap_start[0];
-char *_heap_line = &_heap_start[0];
-char *_ioheap_line = &_heap_end[0]-0x40000000;
-
-sys_putchar_t sys_putchar;
-sys_getchar_t sys_getchar;
-
-void sys_register_putchar(sys_putchar_t putchar)
-{
-    sys_putchar = putchar;
-}
-
-void sys_register_getchar(sys_getchar_t getchar)
-{
-    sys_getchar = getchar;
-}
-
-void sys_stdin_flush(void)
-{
-    if(sys_getchar)
-        while(sys_getchar() != EOF)
-            continue;
-}
-
-void __attribute__((noreturn)) sys_exit(int code)
-{
-    /* Read core id */
-    unsigned long core_id = current_coreid();
-    /* First print some diagnostic information. */
-    LOGW(TAG, "sys_exit called by core %ld with 0x%lx\r\n", core_id, (uint64_t)code);
-    while(1)
-        continue;
-}
-
-static int sys_nosys(long a0, long a1, long a2, long a3, long a4, long a5, unsigned long n)
-{
-    UNUSED(a3);
-    UNUSED(a4);
-    UNUSED(a5);
-
-    LOGE(TAG, "Unsupported syscall %ld: a0=%lx, a1=%lx, a2=%lx!\r\n", n, a0, a1, a2);
-    while(1)
-        continue;
-    return -ENOSYS;
-}
-
-static int sys_success(void)
-{
-    return 0;
-}
-
-static size_t sys_brk(size_t pos)
-{
-    uintptr_t res = 0;
-    /**
-     * brk() sets the end of the data segment to the value
-     * specified by addr, when that value is reasonable, the system
-     * has enough memory, and the process does not exceed its
-     * maximum data size.
-     *
-     * sbrk() increments the program's data space by increment
-     * bytes. Calling sbrk() with an increment of 0 can be used to
-     * find the current location of the program break.
-     *
-     * uintptr_t brk(uintptr_t ptr);
-     *
-     * IN : regs[10] = ptr
-     * OUT: regs[10] = ptr
-     */
-
-    /**
-     * First call: Initialization brk pointer. newlib will pass
-     * ptr = 0 when it is first called. In this case the address
-     * _heap_start will be return.
-     *
-     * Call again: Adjust brk pointer. The ptr never equal with
-     * 0. If ptr is below _heap_end, then allocate memory.
-     * Otherwise throw out of memory error, return -1.
-     */
-
-    if(pos)
-    {
-        /* Call again */
-        if((uintptr_t)pos > (uintptr_t)&_heap_end[0])
-        {
-            /* Memory out, return -ENOMEM */
-            LOGE(TAG, "Out of memory\r\n");
-            res = -ENOMEM;
-        } else
-        {
-            if((uintptr_t)pos > (uintptr_t)_heap_line)
-            {
-                _heap_line = (char *)(uintptr_t)pos;
-                if((uintptr_t)_heap_line-0x40000000 > (uintptr_t)_ioheap_line)
-                {
-                    LOGE(TAG, "Out of memory!\r\n");
-                    while(1)
-                        ;
-                }
-            }
-            /* Adjust brk pointer. */
-            _heap_cur = (char *)(uintptr_t)pos;
-            /* Return current address. */
-            res = (uintptr_t)_heap_cur;
-        }
-    } else
-    {
-        /* First call, return initial address */
-        res = (uintptr_t)&_heap_start[0];
-    }
-    return (size_t)res;
-}
-
-static ssize_t sys_write(int file, const void *ptr, size_t len)
-{
-    ssize_t res = -EBADF;
-
-    /**
-     * Write to a file.
-     *
-     * ssize_t write(int file, const void *ptr, size_t len)
-     *
-     * IN : regs[10] = file, regs[11] = ptr, regs[12] = len
-     * OUT: regs[10] = len
-     */
-
-    /* Get size to write */
-    register size_t length = len;
-    /* Get data pointer */
-    register char *data = (char *)ptr;
-
-    if(STDOUT_FILENO == file || STDERR_FILENO == file)
-    {
-        /* Write data */
-        while(length-- > 0 && data != NULL)
-        {
-            if(sys_putchar)
-                sys_putchar(*(data++));
-        }
-
-        /* Return the actual size written */
-        res = len;
-    } else
-    {
-        /* Not support yet */
-        res = -ENOSYS;
-    }
-
-    return res;
-}
-
-static ssize_t sys_read(int file, void *ptr, size_t len)
-{
-    ssize_t res = -EBADF;
-
-    /**
-     * Write from a file.
-     *
-     * ssize_t read(int file, void *ptr, size_t len)
-     *
-     * IN : regs[10] = file, regs[11] = ptr, regs[12] = len
-     * OUT: regs[10] = len
-     */
-
-    /* Get size to read */
-    register size_t length = len;
-    /* Get data pointer */
-    register char *data = (char *)ptr;
-    /* Actual size to read */
-    register size_t actual_length = 0;
-
-    if(STDIN_FILENO == file)
-    {
-        /* Read data */
-        actual_length = 0;
-        while(length-- > 0 && data != NULL)
-        {
-            if(sys_getchar)
-            {
-                int getchar_result = sys_getchar();
-                /* Get char until not EOF */
-                while(getchar_result == EOF)
-                    getchar_result = sys_getchar();
-                if(getchar_result != EOF)
-                {
-                    /* Not EOF, read data to buffer */
-                    *(data++) = (char)getchar_result;
-                    actual_length++;
-                    /* Echo back this char to user */
-                    if(sys_putchar)
-                        sys_putchar((char)getchar_result);
-                    /* User press RETURN, break. This is the last step in stdin */
-                    if((char)getchar_result == '\r')
-                        break;
-                    if((char)getchar_result == '\n')
-                        break;
-                } else
-                {
-                    /* EOF, do nothing */
-                }
-            }
-        }
-        /* Return the actual size read */
-        res = actual_length;
-    } else
-    {
-        /* Not support yet */
-        res = -ENOSYS;
-    }
-    return res;
-}
-
-static int sys_fstat(int file, struct stat *st)
-{
-    int res = -EBADF;
-
-    /**
-     * Status of an open file. The sys/stat.h header file required
-     * is
-     * distributed in the include subdirectory for this C library.
-     *
-     * int fstat(int file, struct stat* st)
-     *
-     * IN : regs[10] = file, regs[11] = st
-     * OUT: regs[10] = Upon successful completion, 0 shall be
-     * returned.
-     * Otherwise, -1 shall be returned and errno set to indicate
-     * the error.
-     */
-
-    UNUSED(file);
-
-    if(st != NULL)
-        memset(st, 0, sizeof(struct stat));
-    /* Return the result */
-    res = -ENOSYS;
-    /**
-     * Note: This value will return to syscall wrapper, syscall
-     * wrapper will set errno to ENOSYS and return -1
-     */
-
-    return res;
-}
-
-static int sys_close(int file)
-{
-    int res = -EBADF;
-
-    /**
-     * Close a file.
-     *
-     * int close(int file)
-     *
-     * IN : regs[10] = file
-     * OUT: regs[10] = Upon successful completion, 0 shall be
-     * returned.
-     * Otherwise, -1 shall be returned and errno set to indicate
-     * the error.
-     */
-
-    UNUSED(file);
-    /* Return the result */
-    res = 0;
-    return res;
-}
-
-static int sys_gettimeofday(struct timeval *tp, void *tzp)
-{
-    /**
-     * Get the current time.  Only relatively correct.
-     *
-     * int gettimeofday(struct timeval *tp, void *tzp)
-     *
-     * IN : regs[10] = tp
-     * OUT: regs[10] = Upon successful completion, 0 shall be
-     * returned.
-     * Otherwise, -1 shall be returned and errno set to indicate
-     * the error.
-     */
-    UNUSED(tzp);
-
-    if(tp != NULL)
-    {
-        uint64_t clint_usec = clint->mtime / (sysctl_clock_get_freq(SYSCTL_CLOCK_CPU) / CLINT_CLOCK_DIV / 1000000UL);
-
-        tp->tv_sec = clint_usec / 1000000UL;
-        tp->tv_usec = clint_usec % 1000000UL;
-    }
-    /* Return the result */
-    return 0;
-}
-
-uintptr_t __attribute__((weak))
-handle_ecall(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    UNUSED(cause);
-    UNUSED(fregs);
-    enum syscall_id_e
-    {
-        SYS_ID_NOSYS,
-        SYS_ID_SUCCESS,
-        SYS_ID_EXIT,
-        SYS_ID_BRK,
-        SYS_ID_WRITE,
-        SYS_ID_READ,
-        SYS_ID_FSTAT,
-        SYS_ID_CLOSE,
-        SYS_ID_GETTIMEOFDAY,
-        SYS_ID_MAX
-    };
-
-    static uintptr_t (*const syscall_table[])(long a0, long a1, long a2, long a3, long a4, long a5, unsigned long n) =
-        {
-            [SYS_ID_NOSYS] = (void *)sys_nosys,
-            [SYS_ID_SUCCESS] = (void *)sys_success,
-            [SYS_ID_EXIT] = (void *)sys_exit,
-            [SYS_ID_BRK] = (void *)sys_brk,
-            [SYS_ID_WRITE] = (void *)sys_write,
-            [SYS_ID_READ] = (void *)sys_read,
-            [SYS_ID_FSTAT] = (void *)sys_fstat,
-            [SYS_ID_CLOSE] = (void *)sys_close,
-            [SYS_ID_GETTIMEOFDAY] = (void *)sys_gettimeofday,
-        };
-
-#if defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Woverride-init"
-#endif
-    static const uint8_t syscall_id_table[0x100] =
-        {
-            [0x00 ... 0xFF] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_exit] = SYS_ID_EXIT,
-            [0xFF &
-                SYS_exit_group] = SYS_ID_EXIT,
-            [0xFF &
-                SYS_getpid] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_kill] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_read] = SYS_ID_READ,
-            [0xFF &
-                SYS_write] = SYS_ID_WRITE,
-            [0xFF &
-                SYS_open] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_openat] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_close] = SYS_ID_CLOSE,
-            [0xFF &
-                SYS_lseek] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_brk] = SYS_ID_BRK,
-            [0xFF &
-                SYS_link] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_unlink] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_mkdir] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_chdir] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getcwd] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_stat] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_fstat] = SYS_ID_FSTAT,
-            [0xFF &
-                SYS_lstat] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_fstatat] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_access] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_faccessat] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_pread] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_pwrite] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_uname] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getuid] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_geteuid] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getgid] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getegid] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_mmap] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_munmap] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_mremap] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_time] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getmainvars] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_rt_sigaction] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_writev] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_gettimeofday] = SYS_ID_GETTIMEOFDAY,
-            [0xFF &
-                SYS_times] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_fcntl] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_getdents] = SYS_ID_NOSYS,
-            [0xFF &
-                SYS_dup] = SYS_ID_NOSYS,
-        };
-#if defined(__GNUC__)
-#pragma GCC diagnostic warning "-Woverride-init"
-#endif
-
-    regs[10] = syscall_table[syscall_id_table[0xFF & regs[17]]](
-        regs[10], /* a0 */
-        regs[11], /* a1 */
-        regs[12], /* a2 */
-        regs[13], /* a3 */
-        regs[14], /* a4 */
-        regs[15], /* a5 */
-        regs[17]  /* n */
-    );
-
-    return epc + 4;
-}
-
-uintptr_t __attribute__((weak, alias("handle_ecall")))
-handle_ecall_u(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
-
-uintptr_t __attribute__((weak, alias("handle_ecall")))
-handle_ecall_h(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
-
-uintptr_t __attribute__((weak, alias("handle_ecall")))
-handle_ecall_s(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
-
-uintptr_t __attribute__((weak, alias("handle_ecall")))
-handle_ecall_m(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
-
-uintptr_t __attribute__((weak))
-handle_misaligned_fetch(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("misaligned fetch", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_fault_fetch(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("fault fetch", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_illegal_instruction(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("illegal instruction", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_breakpoint(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("breakpoint", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_misaligned_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    /* notice this function only support 16bit or 32bit instruction */
-
-    bool compressed = (*(unsigned short *)epc & 3) != 3;
-    bool fpu = 0;           /* load to fpu ? */
-    uintptr_t addr = 0;     /* src addr */
-    uint8_t src = 0;        /* src register */
-    uint8_t dst = 0;        /* dst register */
-    uint8_t len = 0;        /* data length */
-    int offset = 0;         /* addr offset to addr in reg */
-    bool unsigned_ = 0;     /* unsigned */
-    uint64_t data_load = 0; /* real data load */
-
-    if(compressed)
-    {
-        /* compressed instruction should not get this fault. */
-        goto on_error;
-    } else
-    {
-        uint32_t instruct = *(uint32_t *)epc;
-        uint8_t opcode = instruct & 0x7F;
-
-        dst = (instruct >> 7) & 0x1F;
-        len = (instruct >> 12) & 3;
-        unsigned_ = (instruct >> 14) & 1;
-        src = (instruct >> 15) & 0x1F;
-        offset = (instruct >> 20);
-        len = 1 << len;
-        switch(opcode)
-        {
-            case 3: /* load */
-                break;
-            case 7: /* fpu load */
-                fpu = 1;
-                break;
-            default:
-                goto on_error;
-        }
-    }
-
-    if(offset >> 11)
-        offset = -((offset & 0x3FF) + 1);
-
-    addr = (uint64_t)((uint64_t)regs[src] + offset);
-
-    for(int i = 0; i < len; ++i)
-        data_load |= ((uint64_t) * ((uint8_t *)addr + i)) << (8 * i);
-
-    if(!unsigned_ & !fpu)
-    {
-        /* adjust sign */
-        switch(len)
-        {
-            case 1:
-                data_load = (uint64_t)(int64_t)((int8_t)data_load);
-                break;
-            case 2:
-                data_load = (uint64_t)(int64_t)((int16_t)data_load);
-                break;
-            case 4:
-                data_load = (uint64_t)(int64_t)((int32_t)data_load);
-                break;
-            default:
-                break;
-        }
-    }
-
-    if(fpu)
-        fregs[dst] = data_load;
-    else
-        regs[dst] = data_load;
-
-    LOGV(TAG, "misaligned load recovered at %08lx. len:%02d,addr:%08lx,reg:%02d,data:%016lx,signed:%1d,float:%1d", (uint64_t)epc, len, (uint64_t)addr, dst, data_load, !unsigned_, fpu);
-
-    return epc + (compressed ? 2 : 4);
-on_error:
-    dump_core("misaligned load", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_fault_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("fault load", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_misaligned_store(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    /* notice this function only support 16bit or 32bit instruction */
-
-    bool compressed = (*(unsigned short *)epc & 3) != 3;
-    bool fpu = 0;            /* store to fpu*/
-    uintptr_t addr = 0;      /* src addr*/
-    uint8_t src = 0;         /* src register*/
-    uint8_t dst = 0;         /* dst register*/
-    uint8_t len = 0;         /* data length*/
-    int offset = 0;          /* addr offset to addr in reg*/
-    uint64_t data_store = 0; /* real data store*/
-
-    if(compressed)
-    {
-        /* compressed instruction should not get this fault. */
-        goto on_error;
-    } else
-    {
-        uint32_t instruct = *(uint32_t *)epc;
-        uint8_t opcode = instruct & 0x7F;
-
-        len = (instruct >> 12) & 7;
-        dst = (instruct >> 15) & 0x1F;
-        src = (instruct >> 20) & 0x1F;
-        offset = ((instruct >> 7) & 0x1F) | ((instruct >> 20) & 0xFE0);
-        len = 1 << len;
-        switch(opcode)
-        {
-            case 0x23: /* store */
-                break;
-            case 0x27: /* fpu store */
-                fpu = 1;
-                break;
-            default:
-                goto on_error;
-        }
-    }
-
-    if(offset >> 11)
-        offset = -((offset & 0x3FF) + 1);
-
-    addr = (uint64_t)((uint64_t)regs[dst] + offset);
-
-    if(fpu)
-        data_store = fregs[src];
-    else
-        data_store = regs[src];
-
-    for(int i = 0; i < len; ++i)
-        *((uint8_t *)addr + i) = (data_store >> (i * 8)) & 0xFF;
-
-    LOGV(TAG, "misaligned store recovered at %08lx. len:%02d,addr:%08lx,reg:%02d,data:%016lx,float:%1d", (uint64_t)epc, len, (uint64_t)addr, src, data_store, fpu);
-
-    return epc + (compressed ? 2 : 4);
-on_error:
-    dump_core("misaligned store", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t __attribute__((weak))
-handle_fault_store(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-    dump_core("fault store", cause, epc, regs, fregs);
-    sys_exit(1337);
-    return epc;
-}
-
-uintptr_t handle_syscall(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
-{
-
-    static uintptr_t (*const cause_table[])(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]) =
-        {
-            [CAUSE_MISALIGNED_FETCH] = handle_misaligned_fetch,
-            [CAUSE_FAULT_FETCH] = handle_fault_fetch,
-            [CAUSE_ILLEGAL_INSTRUCTION] = handle_illegal_instruction,
-            [CAUSE_BREAKPOINT] = handle_breakpoint,
-            [CAUSE_MISALIGNED_LOAD] = handle_misaligned_load,
-            [CAUSE_FAULT_LOAD] = handle_fault_load,
-            [CAUSE_MISALIGNED_STORE] = handle_misaligned_store,
-            [CAUSE_FAULT_STORE] = handle_fault_store,
-            [CAUSE_USER_ECALL] = handle_ecall_u,
-            [CAUSE_SUPERVISOR_ECALL] = handle_ecall_h,
-            [CAUSE_HYPERVISOR_ECALL] = handle_ecall_s,
-            [CAUSE_MACHINE_ECALL] = handle_ecall_m,
-        };
-
-    return cause_table[cause](cause, epc, regs, fregs);
-}
-
-size_t get_free_heap_size(void)
-{
-    return (size_t)iomem_unused();
-}
+/* Copyright 2018 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Enable kernel-mode log API */
+
+#include <errno.h>
+#include <limits.h>
+#include <machine/syscall.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/unistd.h>
+#include "atomic.h"
+#include "clint.h"
+#include "dump.h"
+#include "fpioa.h"
+#include "interrupt.h"
+#include "syscalls.h"
+#include "sysctl.h"
+#include "syslog.h"
+#include "util.h"
+#include "iomem.h"
+
+/**
+ * @note       System call list
+ *
+ * See also riscv-newlib/libgloss/riscv/syscalls.c
+ *
+ * | System call      | Number |
+ * |------------------|--------|
+ * | SYS_exit         | 93     |
+ * | SYS_exit_group   | 94     |
+ * | SYS_getpid       | 172    |
+ * | SYS_kill         | 129    |
+ * | SYS_read         | 63     |
+ * | SYS_write        | 64     |
+ * | SYS_open         | 1024   |
+ * | SYS_openat       | 56     |
+ * | SYS_close        | 57     |
+ * | SYS_lseek        | 62     |
+ * | SYS_brk          | 214    |
+ * | SYS_link         | 1025   |
+ * | SYS_unlink       | 1026   |
+ * | SYS_mkdir        | 1030   |
+ * | SYS_chdir        | 49     |
+ * | SYS_getcwd       | 17     |
+ * | SYS_stat         | 1038   |
+ * | SYS_fstat        | 80     |
+ * | SYS_lstat        | 1039   |
+ * | SYS_fstatat      | 79     |
+ * | SYS_access       | 1033   |
+ * | SYS_faccessat    | 48     |
+ * | SYS_pread        | 67     |
+ * | SYS_pwrite       | 68     |
+ * | SYS_uname        | 160    |
+ * | SYS_getuid       | 174    |
+ * | SYS_geteuid      | 175    |
+ * | SYS_getgid       | 176    |
+ * | SYS_getegid      | 177    |
+ * | SYS_mmap         | 222    |
+ * | SYS_munmap       | 215    |
+ * | SYS_mremap       | 216    |
+ * | SYS_time         | 1062   |
+ * | SYS_getmainvars  | 2011   |
+ * | SYS_rt_sigaction | 134    |
+ * | SYS_writev       | 66     |
+ * | SYS_gettimeofday | 169    |
+ * | SYS_times        | 153    |
+ * | SYS_fcntl        | 25     |
+ * | SYS_getdents     | 61     |
+ * | SYS_dup          | 23     |
+ *
+ */
+
+#ifndef UNUSED
+#define UNUSED(x) (void)(x)
+#endif
+
+static const char *TAG = "SYSCALL";
+
+extern char _heap_start[];
+extern char _heap_end[];
+char *_heap_cur = &_heap_start[0];
+char *_heap_line = &_heap_start[0];
+char *_ioheap_line = &_heap_end[0]-0x40000000;
+
+sys_putchar_t sys_putchar;
+sys_getchar_t sys_getchar;
+
+void sys_register_putchar(sys_putchar_t putchar)
+{
+    sys_putchar = putchar;
+}
+
+void sys_register_getchar(sys_getchar_t getchar)
+{
+    sys_getchar = getchar;
+}
+
+void sys_stdin_flush(void)
+{
+    if(sys_getchar)
+        while(sys_getchar() != EOF)
+            continue;
+}
+
+void __attribute__((noreturn)) sys_exit(int code)
+{
+    /* Read core id */
+    unsigned long core_id = current_coreid();
+    /* First print some diagnostic information. */
+    LOGW(TAG, "sys_exit called by core %ld with 0x%lx\r\n", core_id, (uint64_t)code);
+    while(1)
+        continue;
+}
+
+static int sys_nosys(long a0, long a1, long a2, long a3, long a4, long a5, unsigned long n)
+{
+    UNUSED(a3);
+    UNUSED(a4);
+    UNUSED(a5);
+
+    LOGE(TAG, "Unsupported syscall %ld: a0=%lx, a1=%lx, a2=%lx!\r\n", n, a0, a1, a2);
+    while(1)
+        continue;
+    return -ENOSYS;
+}
+
+static int sys_success(void)
+{
+    return 0;
+}
+
+static size_t sys_brk(size_t pos)
+{
+    uintptr_t res = 0;
+    /**
+     * brk() sets the end of the data segment to the value
+     * specified by addr, when that value is reasonable, the system
+     * has enough memory, and the process does not exceed its
+     * maximum data size.
+     *
+     * sbrk() increments the program's data space by increment
+     * bytes. Calling sbrk() with an increment of 0 can be used to
+     * find the current location of the program break.
+     *
+     * uintptr_t brk(uintptr_t ptr);
+     *
+     * IN : regs[10] = ptr
+     * OUT: regs[10] = ptr
+     */
+
+    /**
+     * First call: Initialization brk pointer. newlib will pass
+     * ptr = 0 when it is first called. In this case the address
+     * _heap_start will be return.
+     *
+     * Call again: Adjust brk pointer. The ptr never equal with
+     * 0. If ptr is below _heap_end, then allocate memory.
+     * Otherwise throw out of memory error, return -1.
+     */
+
+    if(pos)
+    {
+        /* Call again */
+        if((uintptr_t)pos > (uintptr_t)&_heap_end[0])
+        {
+            /* Memory out, return -ENOMEM */
+            LOGE(TAG, "Out of memory\r\n");
+            res = -ENOMEM;
+        } else
+        {
+            if((uintptr_t)pos > (uintptr_t)_heap_line)
+            {
+                _heap_line = (char *)(uintptr_t)pos;
+                if((uintptr_t)_heap_line-0x40000000 > (uintptr_t)_ioheap_line)
+                {
+                    LOGE(TAG, "Out of memory!\r\n");
+                    while(1)
+                        ;
+                }
+            }
+            /* Adjust brk pointer. */
+            _heap_cur = (char *)(uintptr_t)pos;
+            /* Return current address. */
+            res = (uintptr_t)_heap_cur;
+        }
+    } else
+    {
+        /* First call, return initial address */
+        res = (uintptr_t)&_heap_start[0];
+    }
+    return (size_t)res;
+}
+
+static ssize_t sys_write(int file, const void *ptr, size_t len)
+{
+    ssize_t res = -EBADF;
+
+    /**
+     * Write to a file.
+     *
+     * ssize_t write(int file, const void *ptr, size_t len)
+     *
+     * IN : regs[10] = file, regs[11] = ptr, regs[12] = len
+     * OUT: regs[10] = len
+     */
+
+    /* Get size to write */
+    register size_t length = len;
+    /* Get data pointer */
+    register char *data = (char *)ptr;
+
+    if(STDOUT_FILENO == file || STDERR_FILENO == file)
+    {
+        /* Write data */
+        while(length-- > 0 && data != NULL)
+        {
+            if(sys_putchar)
+                sys_putchar(*(data++));
+        }
+
+        /* Return the actual size written */
+        res = len;
+    } else
+    {
+        /* Not support yet */
+        res = -ENOSYS;
+    }
+
+    return res;
+}
+
+static ssize_t sys_read(int file, void *ptr, size_t len)
+{
+    ssize_t res = -EBADF;
+
+    /**
+     * Write from a file.
+     *
+     * ssize_t read(int file, void *ptr, size_t len)
+     *
+     * IN : regs[10] = file, regs[11] = ptr, regs[12] = len
+     * OUT: regs[10] = len
+     */
+
+    /* Get size to read */
+    register size_t length = len;
+    /* Get data pointer */
+    register char *data = (char *)ptr;
+    /* Actual size to read */
+    register size_t actual_length = 0;
+
+    if(STDIN_FILENO == file)
+    {
+        /* Read data */
+        actual_length = 0;
+        while(length-- > 0 && data != NULL)
+        {
+            if(sys_getchar)
+            {
+                int getchar_result = sys_getchar();
+                /* Get char until not EOF */
+                while(getchar_result == EOF)
+                    getchar_result = sys_getchar();
+                if(getchar_result != EOF)
+                {
+                    /* Not EOF, read data to buffer */
+                    *(data++) = (char)getchar_result;
+                    actual_length++;
+                    /* Echo back this char to user */
+                    if(sys_putchar)
+                        sys_putchar((char)getchar_result);
+                    /* User press RETURN, break. This is the last step in stdin */
+                    if((char)getchar_result == '\r')
+                        break;
+                    if((char)getchar_result == '\n')
+                        break;
+                } else
+                {
+                    /* EOF, do nothing */
+                }
+            }
+        }
+        /* Return the actual size read */
+        res = actual_length;
+    } else
+    {
+        /* Not support yet */
+        res = -ENOSYS;
+    }
+    return res;
+}
+
+static int sys_fstat(int file, struct stat *st)
+{
+    int res = -EBADF;
+
+    /**
+     * Status of an open file. The sys/stat.h header file required
+     * is
+     * distributed in the include subdirectory for this C library.
+     *
+     * int fstat(int file, struct stat* st)
+     *
+     * IN : regs[10] = file, regs[11] = st
+     * OUT: regs[10] = Upon successful completion, 0 shall be
+     * returned.
+     * Otherwise, -1 shall be returned and errno set to indicate
+     * the error.
+     */
+
+    UNUSED(file);
+
+    if(st != NULL)
+        memset(st, 0, sizeof(struct stat));
+    /* Return the result */
+    res = -ENOSYS;
+    /**
+     * Note: This value will return to syscall wrapper, syscall
+     * wrapper will set errno to ENOSYS and return -1
+     */
+
+    return res;
+}
+
+static int sys_close(int file)
+{
+    int res = -EBADF;
+
+    /**
+     * Close a file.
+     *
+     * int close(int file)
+     *
+     * IN : regs[10] = file
+     * OUT: regs[10] = Upon successful completion, 0 shall be
+     * returned.
+     * Otherwise, -1 shall be returned and errno set to indicate
+     * the error.
+     */
+
+    UNUSED(file);
+    /* Return the result */
+    res = 0;
+    return res;
+}
+
+static int sys_gettimeofday(struct timeval *tp, void *tzp)
+{
+    /**
+     * Get the current time.  Only relatively correct.
+     *
+     * int gettimeofday(struct timeval *tp, void *tzp)
+     *
+     * IN : regs[10] = tp
+     * OUT: regs[10] = Upon successful completion, 0 shall be
+     * returned.
+     * Otherwise, -1 shall be returned and errno set to indicate
+     * the error.
+     */
+    UNUSED(tzp);
+
+    if(tp != NULL)
+    {
+        uint64_t clint_usec = clint->mtime / (sysctl_clock_get_freq(SYSCTL_CLOCK_CPU) / CLINT_CLOCK_DIV / 1000000UL);
+
+        tp->tv_sec = clint_usec / 1000000UL;
+        tp->tv_usec = clint_usec % 1000000UL;
+    }
+    /* Return the result */
+    return 0;
+}
+
+uintptr_t __attribute__((weak))
+handle_ecall(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    UNUSED(cause);
+    UNUSED(fregs);
+    enum syscall_id_e
+    {
+        SYS_ID_NOSYS,
+        SYS_ID_SUCCESS,
+        SYS_ID_EXIT,
+        SYS_ID_BRK,
+        SYS_ID_WRITE,
+        SYS_ID_READ,
+        SYS_ID_FSTAT,
+        SYS_ID_CLOSE,
+        SYS_ID_GETTIMEOFDAY,
+        SYS_ID_MAX
+    };
+
+    static uintptr_t (*const syscall_table[])(long a0, long a1, long a2, long a3, long a4, long a5, unsigned long n) =
+        {
+            [SYS_ID_NOSYS] = (void *)sys_nosys,
+            [SYS_ID_SUCCESS] = (void *)sys_success,
+            [SYS_ID_EXIT] = (void *)sys_exit,
+            [SYS_ID_BRK] = (void *)sys_brk,
+            [SYS_ID_WRITE] = (void *)sys_write,
+            [SYS_ID_READ] = (void *)sys_read,
+            [SYS_ID_FSTAT] = (void *)sys_fstat,
+            [SYS_ID_CLOSE] = (void *)sys_close,
+            [SYS_ID_GETTIMEOFDAY] = (void *)sys_gettimeofday,
+        };
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Woverride-init"
+#endif
+    static const uint8_t syscall_id_table[0x100] =
+        {
+            [0x00 ... 0xFF] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_exit] = SYS_ID_EXIT,
+            [0xFF &
+                SYS_exit_group] = SYS_ID_EXIT,
+            [0xFF &
+                SYS_getpid] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_kill] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_read] = SYS_ID_READ,
+            [0xFF &
+                SYS_write] = SYS_ID_WRITE,
+            [0xFF &
+                SYS_open] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_openat] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_close] = SYS_ID_CLOSE,
+            [0xFF &
+                SYS_lseek] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_brk] = SYS_ID_BRK,
+            [0xFF &
+                SYS_link] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_unlink] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_mkdir] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_chdir] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getcwd] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_stat] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_fstat] = SYS_ID_FSTAT,
+            [0xFF &
+                SYS_lstat] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_fstatat] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_access] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_faccessat] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_pread] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_pwrite] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_uname] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getuid] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_geteuid] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getgid] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getegid] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_mmap] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_munmap] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_mremap] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_time] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getmainvars] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_rt_sigaction] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_writev] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_gettimeofday] = SYS_ID_GETTIMEOFDAY,
+            [0xFF &
+                SYS_times] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_fcntl] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_getdents] = SYS_ID_NOSYS,
+            [0xFF &
+                SYS_dup] = SYS_ID_NOSYS,
+        };
+#if defined(__GNUC__)
+#pragma GCC diagnostic warning "-Woverride-init"
+#endif
+
+    regs[10] = syscall_table[syscall_id_table[0xFF & regs[17]]](
+        regs[10], /* a0 */
+        regs[11], /* a1 */
+        regs[12], /* a2 */
+        regs[13], /* a3 */
+        regs[14], /* a4 */
+        regs[15], /* a5 */
+        regs[17]  /* n */
+    );
+
+    return epc + 4;
+}
+
+uintptr_t __attribute__((weak, alias("handle_ecall")))
+handle_ecall_u(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
+
+uintptr_t __attribute__((weak, alias("handle_ecall")))
+handle_ecall_h(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
+
+uintptr_t __attribute__((weak, alias("handle_ecall")))
+handle_ecall_s(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
+
+uintptr_t __attribute__((weak, alias("handle_ecall")))
+handle_ecall_m(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]);
+
+uintptr_t __attribute__((weak))
+handle_misaligned_fetch(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("misaligned fetch", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_fault_fetch(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("fault fetch", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_illegal_instruction(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("illegal instruction", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_breakpoint(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("breakpoint", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_misaligned_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    /* notice this function only support 16bit or 32bit instruction */
+
+    bool compressed = (*(unsigned short *)epc & 3) != 3;
+    bool fpu = 0;           /* load to fpu ? */
+    uintptr_t addr = 0;     /* src addr */
+    uint8_t src = 0;        /* src register */
+    uint8_t dst = 0;        /* dst register */
+    uint8_t len = 0;        /* data length */
+    int offset = 0;         /* addr offset to addr in reg */
+    bool unsigned_ = 0;     /* unsigned */
+    uint64_t data_load = 0; /* real data load */
+
+    if(compressed)
+    {
+        /* compressed instruction should not get this fault. */
+        goto on_error;
+    } else
+    {
+        uint32_t instruct = *(uint32_t *)epc;
+        uint8_t opcode = instruct & 0x7F;
+
+        dst = (instruct >> 7) & 0x1F;
+        len = (instruct >> 12) & 3;
+        unsigned_ = (instruct >> 14) & 1;
+        src = (instruct >> 15) & 0x1F;
+        offset = (instruct >> 20);
+        len = 1 << len;
+        switch(opcode)
+        {
+            case 3: /* load */
+                break;
+            case 7: /* fpu load */
+                fpu = 1;
+                break;
+            default:
+                goto on_error;
+        }
+    }
+
+    if(offset >> 11)
+        offset = -((offset & 0x3FF) + 1);
+
+    addr = (uint64_t)((uint64_t)regs[src] + offset);
+
+    for(int i = 0; i < len; ++i)
+        data_load |= ((uint64_t) * ((uint8_t *)addr + i)) << (8 * i);
+
+    if(!unsigned_ & !fpu)
+    {
+        /* adjust sign */
+        switch(len)
+        {
+            case 1:
+                data_load = (uint64_t)(int64_t)((int8_t)data_load);
+                break;
+            case 2:
+                data_load = (uint64_t)(int64_t)((int16_t)data_load);
+                break;
+            case 4:
+                data_load = (uint64_t)(int64_t)((int32_t)data_load);
+                break;
+            default:
+                break;
+        }
+    }
+
+    if(fpu)
+        fregs[dst] = data_load;
+    else
+        regs[dst] = data_load;
+
+    LOGV(TAG, "misaligned load recovered at %08lx. len:%02d,addr:%08lx,reg:%02d,data:%016lx,signed:%1d,float:%1d", (uint64_t)epc, len, (uint64_t)addr, dst, data_load, !unsigned_, fpu);
+
+    return epc + (compressed ? 2 : 4);
+on_error:
+    dump_core("misaligned load", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_fault_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("fault load", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_misaligned_store(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    /* notice this function only support 16bit or 32bit instruction */
+
+    bool compressed = (*(unsigned short *)epc & 3) != 3;
+    bool fpu = 0;            /* store to fpu*/
+    uintptr_t addr = 0;      /* src addr*/
+    uint8_t src = 0;         /* src register*/
+    uint8_t dst = 0;         /* dst register*/
+    uint8_t len = 0;         /* data length*/
+    int offset = 0;          /* addr offset to addr in reg*/
+    uint64_t data_store = 0; /* real data store*/
+
+    if(compressed)
+    {
+        /* compressed instruction should not get this fault. */
+        goto on_error;
+    } else
+    {
+        uint32_t instruct = *(uint32_t *)epc;
+        uint8_t opcode = instruct & 0x7F;
+
+        len = (instruct >> 12) & 7;
+        dst = (instruct >> 15) & 0x1F;
+        src = (instruct >> 20) & 0x1F;
+        offset = ((instruct >> 7) & 0x1F) | ((instruct >> 20) & 0xFE0);
+        len = 1 << len;
+        switch(opcode)
+        {
+            case 0x23: /* store */
+                break;
+            case 0x27: /* fpu store */
+                fpu = 1;
+                break;
+            default:
+                goto on_error;
+        }
+    }
+
+    if(offset >> 11)
+        offset = -((offset & 0x3FF) + 1);
+
+    addr = (uint64_t)((uint64_t)regs[dst] + offset);
+
+    if(fpu)
+        data_store = fregs[src];
+    else
+        data_store = regs[src];
+
+    for(int i = 0; i < len; ++i)
+        *((uint8_t *)addr + i) = (data_store >> (i * 8)) & 0xFF;
+
+    LOGV(TAG, "misaligned store recovered at %08lx. len:%02d,addr:%08lx,reg:%02d,data:%016lx,float:%1d", (uint64_t)epc, len, (uint64_t)addr, src, data_store, fpu);
+
+    return epc + (compressed ? 2 : 4);
+on_error:
+    dump_core("misaligned store", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t __attribute__((weak))
+handle_fault_store(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+    dump_core("fault store", cause, epc, regs, fregs);
+    sys_exit(1337);
+    return epc;
+}
+
+uintptr_t handle_syscall(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32])
+{
+
+    static uintptr_t (*const cause_table[])(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]) =
+        {
+            [CAUSE_MISALIGNED_FETCH] = handle_misaligned_fetch,
+            [CAUSE_FAULT_FETCH] = handle_fault_fetch,
+            [CAUSE_ILLEGAL_INSTRUCTION] = handle_illegal_instruction,
+            [CAUSE_BREAKPOINT] = handle_breakpoint,
+            [CAUSE_MISALIGNED_LOAD] = handle_misaligned_load,
+            [CAUSE_FAULT_LOAD] = handle_fault_load,
+            [CAUSE_MISALIGNED_STORE] = handle_misaligned_store,
+            [CAUSE_FAULT_STORE] = handle_fault_store,
+            [CAUSE_USER_ECALL] = handle_ecall_u,
+            [CAUSE_SUPERVISOR_ECALL] = handle_ecall_h,
+            [CAUSE_HYPERVISOR_ECALL] = handle_ecall_s,
+            [CAUSE_MACHINE_ECALL] = handle_ecall_m,
+        };
+
+    return cause_table[cause](cause, epc, regs, fregs);
+}
+
+size_t get_free_heap_size(void)
+{
+    return (size_t)iomem_unused();
+}

+ 67 - 67
lib/drivers/fft.c

@@ -1,67 +1,67 @@
-/* Copyright 2018 Canaan Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <stddef.h>
-#include "dmac.h"
-#include "fft.h"
-#include "sysctl.h"
-#include "utils.h"
-
-static volatile fft_t *const fft = (volatile fft_t *)FFT_BASE_ADDR;
-
-static void fft_init(uint8_t point, uint8_t mode, uint16_t shift, uint8_t is_dma, uint8_t input_mode, uint8_t data_mode)
-{
-    fft->fft_ctrl.fft_point = point;
-    fft->fft_ctrl.fft_mode = mode;
-    fft->fft_ctrl.fft_shift = shift;
-    fft->fft_ctrl.dma_send = is_dma;
-    fft->fft_ctrl.fft_enable = 1;
-    fft->fft_ctrl.fft_input_mode = input_mode;
-    fft->fft_ctrl.fft_data_mode = data_mode;
-}
-
-void fft_complex_uint16_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_number_t dma_receive_channel_num,
-                            uint16_t shift, fft_direction_t direction, const uint64_t *input, size_t point_num, uint64_t *output)
-{
-    fft_point_t point = FFT_512;
-    switch(point_num)
-    {
-        case 512:
-            point = FFT_512;
-            break;
-        case 256:
-            point = FFT_256;
-            break;
-        case 128:
-            point = FFT_128;
-            break;
-        case 64:
-            point = FFT_64;
-            break;
-        default:
-            configASSERT(!"fft point error");
-            break;
-    }
-    sysctl_clock_enable(SYSCTL_CLOCK_FFT);
-    sysctl_reset(SYSCTL_RESET_FFT);
-    fft_init(point, direction, shift, 1, 0, 0);
-    sysctl_dma_select(dma_receive_channel_num, SYSCTL_DMA_SELECT_FFT_RX_REQ);
-    sysctl_dma_select(dma_send_channel_num, SYSCTL_DMA_SELECT_FFT_TX_REQ);
-    dmac_set_single_mode(dma_receive_channel_num, (void *)(&fft->fft_output_fifo), output, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_64, point_num >> 1);
-    dmac_set_single_mode(dma_send_channel_num, input, (void *)(&fft->fft_input_fifo), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_64, point_num >> 1);
-    dmac_wait_done(dma_send_channel_num);
-    dmac_wait_done(dma_receive_channel_num);
-}
+/* Copyright 2018 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stddef.h>
+#include "dmac.h"
+#include "fft.h"
+#include "sysctl.h"
+#include "utils.h"
+
+static volatile fft_t *const fft = (volatile fft_t *)FFT_BASE_ADDR;
+
+static void fft_init(uint8_t point, uint8_t mode, uint16_t shift, uint8_t is_dma, uint8_t input_mode, uint8_t data_mode)
+{
+    fft->fft_ctrl.fft_point = point;
+    fft->fft_ctrl.fft_mode = mode;
+    fft->fft_ctrl.fft_shift = shift;
+    fft->fft_ctrl.dma_send = is_dma;
+    fft->fft_ctrl.fft_enable = 1;
+    fft->fft_ctrl.fft_input_mode = input_mode;
+    fft->fft_ctrl.fft_data_mode = data_mode;
+}
+
+void fft_complex_uint16_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_number_t dma_receive_channel_num,
+                            uint16_t shift, fft_direction_t direction, const uint64_t *input, size_t point_num, uint64_t *output)
+{
+    fft_point_t point = FFT_512;
+    switch(point_num)
+    {
+        case 512:
+            point = FFT_512;
+            break;
+        case 256:
+            point = FFT_256;
+            break;
+        case 128:
+            point = FFT_128;
+            break;
+        case 64:
+            point = FFT_64;
+            break;
+        default:
+            configASSERT(!"fft point error");
+            break;
+    }
+    sysctl_clock_enable(SYSCTL_CLOCK_FFT);
+    sysctl_reset(SYSCTL_RESET_FFT);
+    fft_init(point, direction, shift, 1, 0, 0);
+    sysctl_dma_select(dma_receive_channel_num, SYSCTL_DMA_SELECT_FFT_RX_REQ);
+    sysctl_dma_select(dma_send_channel_num, SYSCTL_DMA_SELECT_FFT_TX_REQ);
+    dmac_set_single_mode(dma_receive_channel_num, (void *)(&fft->fft_output_fifo), output, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_64, point_num >> 1);
+    dmac_set_single_mode(dma_send_channel_num, input, (void *)(&fft->fft_input_fifo), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_64, point_num >> 1);
+    dmac_wait_done(dma_send_channel_num);
+    dmac_wait_done(dma_receive_channel_num);
+}

+ 697 - 697
lib/drivers/i2s.c

@@ -1,697 +1,697 @@
-/* Copyright 2018 Canaan Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <math.h>
-#include <stdint.h>
-#include <stdio.h>
-#include "i2s.h"
-#include "stdlib.h"
-#include "sysctl.h"
-#include "utils.h"
-
-volatile i2s_t *const i2s[3] =
-    {
-        (volatile i2s_t *)I2S0_BASE_ADDR,
-        (volatile i2s_t *)I2S1_BASE_ADDR,
-        (volatile i2s_t *)I2S2_BASE_ADDR};
-
-typedef struct _i2s_instance
-{
-    i2s_device_number_t i2s_num;
-    i2s_transfer_mode_t transfer_mode;
-    dmac_channel_number_t dmac_channel;
-    plic_instance_t i2s_int_instance;
-} i2s_instance_t;
-
-static i2s_instance_t g_i2s_send_instance[3];
-static i2s_instance_t g_i2s_recv_instance[3];
-
-static int i2s_recv_channel_enable(i2s_device_number_t device_num,
-                                   i2s_channel_num_t channel_num, uint32_t enable)
-{
-    rer_t u_rer;
-
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-    u_rer.reg_data = readl(&i2s[device_num]->channel[channel_num].rer);
-    u_rer.rer.rxchenx = enable;
-    writel(u_rer.reg_data, &i2s[device_num]->channel[channel_num].rer);
-    return 0;
-}
-
-static int i2s_transmit_channel_enable(i2s_device_number_t device_num,
-                                       i2s_channel_num_t channel_num, uint32_t enable)
-{
-    ter_t u_ter;
-
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    u_ter.reg_data = readl(&i2s[device_num]->channel[channel_num].ter);
-    u_ter.ter.txchenx = enable;
-    writel(u_ter.reg_data, &i2s[device_num]->channel[channel_num].ter);
-    return 0;
-}
-
-static void i2s_receive_enable(i2s_device_number_t device_num, i2s_channel_num_t channel_num)
-{
-    irer_t u_irer;
-
-    u_irer.reg_data = readl(&i2s[device_num]->irer);
-    u_irer.irer.rxen = 1;
-    writel(u_irer.reg_data, &i2s[device_num]->irer);
-    /* Receiver block enable */
-
-    i2s_recv_channel_enable(device_num, channel_num, 1);
-    /* Receive channel enable */
-}
-
-static void i2s_transimit_enable(i2s_device_number_t device_num, i2s_channel_num_t channel_num)
-{
-    iter_t u_iter;
-
-    u_iter.reg_data = readl(&i2s[device_num]->iter);
-    u_iter.iter.txen = 1;
-    writel(u_iter.reg_data, &i2s[device_num]->iter);
-    /* Transmitter block enable */
-
-    i2s_transmit_channel_enable(device_num, channel_num, 1);
-    /* Transmit channel enable */
-}
-
-static void i2s_set_enable(i2s_device_number_t device_num, uint32_t enable)
-{
-    ier_t u_ier;
-
-    u_ier.reg_data = readl(&i2s[device_num]->ier);
-    u_ier.ier.ien = enable;
-    writel(u_ier.reg_data, &i2s[device_num]->ier);
-}
-
-static void i2s_disable_block(i2s_device_number_t device_num, i2s_transmit_t rxtx_mode)
-{
-    irer_t u_irer;
-    iter_t u_iter;
-
-    if(rxtx_mode == I2S_RECEIVER)
-    {
-        u_irer.reg_data = readl(&i2s[device_num]->irer);
-        u_irer.irer.rxen = 0;
-        writel(u_irer.reg_data, &i2s[device_num]->irer);
-        /* Receiver block disable */
-    } else
-    {
-        u_iter.reg_data = readl(&i2s[device_num]->iter);
-        u_iter.iter.txen = 0;
-        writel(u_iter.reg_data, &i2s[device_num]->iter);
-        /* Transmitter block disable */
-    }
-}
-
-static int i2s_set_rx_word_length(i2s_device_number_t device_num,
-                                  i2s_word_length_t word_length,
-                                  i2s_channel_num_t channel_num)
-{
-    rcr_tcr_t u_rcr;
-
-    if(word_length > RESOLUTION_32_BIT || word_length < IGNORE_WORD_LENGTH)
-        return -1;
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    u_rcr.reg_data = readl(&i2s[device_num]->channel[channel_num].rcr);
-    u_rcr.rcr_tcr.wlen = word_length;
-    writel(u_rcr.reg_data, &i2s[device_num]->channel[channel_num].rcr);
-    return 0;
-}
-
-static int i2s_set_tx_word_length(i2s_device_number_t device_num,
-                                  i2s_word_length_t word_length,
-                                  i2s_channel_num_t channel_num)
-{
-    rcr_tcr_t u_tcr;
-
-    if(word_length > RESOLUTION_32_BIT || word_length < IGNORE_WORD_LENGTH)
-        return -1;
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    u_tcr.reg_data = readl(&i2s[device_num]->channel[channel_num].tcr);
-    u_tcr.rcr_tcr.wlen = word_length;
-    writel(u_tcr.reg_data, &i2s[device_num]->channel[channel_num].tcr);
-    return 0;
-}
-
-static void i2s_master_configure(i2s_device_number_t device_num,
-                                 i2s_word_select_cycles_t word_select_size,
-                                 i2s_sclk_gating_cycles_t gating_cycles,
-                                 i2s_work_mode_t word_mode)
-{
-    configASSERT(!(word_select_size < SCLK_CYCLES_16 ||
-                   word_select_size > SCLK_CYCLES_32));
-    configASSERT(!(gating_cycles < NO_CLOCK_GATING ||
-                   gating_cycles > CLOCK_CYCLES_24));
-
-    ccr_t u_ccr;
-    cer_t u_cer;
-
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    u_ccr.ccr.clk_word_size = word_select_size;
-    u_ccr.ccr.clk_gate = gating_cycles;
-    u_ccr.ccr.align_mode = word_mode;
-    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
-
-    u_cer.reg_data = readl(&i2s[device_num]->cer);
-    u_cer.cer.clken = 1;
-    writel(u_cer.reg_data, &i2s[device_num]->cer);
-    /* Clock generation enable */
-}
-
-static int i2s_set_rx_threshold(i2s_device_number_t device_num,
-                                i2s_fifo_threshold_t threshold,
-                                i2s_channel_num_t channel_num)
-{
-    rfcr_t u_rfcr;
-
-    if(threshold < TRIGGER_LEVEL_1 || threshold > TRIGGER_LEVEL_16)
-        return -1;
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    u_rfcr.reg_data = readl(&i2s[device_num]->channel[channel_num].rfcr);
-    u_rfcr.rfcr.rxchdt = threshold;
-    writel(u_rfcr.reg_data, &i2s[device_num]->channel[channel_num].rfcr);
-
-    return 0;
-}
-
-static int i2s_set_tx_threshold(i2s_device_number_t device_num,
-                                i2s_fifo_threshold_t threshold,
-                                i2s_channel_num_t channel_num)
-{
-    tfcr_t u_tfcr;
-
-    if(threshold < TRIGGER_LEVEL_1 || threshold > TRIGGER_LEVEL_16)
-        return -1;
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    u_tfcr.reg_data = readl(&i2s[device_num]->channel[channel_num].tfcr);
-    u_tfcr.tfcr.txchet = threshold;
-    writel(u_tfcr.reg_data, &i2s[device_num]->channel[channel_num].tfcr);
-    return 0;
-}
-
-static int i2s_set_mask_interrupt(i2s_device_number_t device_num,
-                                  i2s_channel_num_t channel_num,
-                                  uint32_t rx_available_int, uint32_t rx_overrun_int,
-                                  uint32_t tx_empty_int, uint32_t tx_overrun_int)
-{
-    imr_t u_imr;
-
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-    u_imr.reg_data = readl(&i2s[device_num]->channel[channel_num].imr);
-
-    if(rx_available_int == 1)
-        u_imr.imr.rxdam = 1;
-    else
-        u_imr.imr.rxdam = 0;
-    if(rx_overrun_int == 1)
-        u_imr.imr.rxfom = 1;
-    else
-        u_imr.imr.rxfom = 0;
-
-    if(tx_empty_int == 1)
-        u_imr.imr.txfem = 1;
-    else
-        u_imr.imr.txfem = 0;
-    if(tx_overrun_int == 1)
-        u_imr.imr.txfom = 1;
-    else
-        u_imr.imr.txfom = 0;
-    writel(u_imr.reg_data, &i2s[device_num]->channel[channel_num].imr);
-    return 0;
-}
-
-static int i2s_transmit_dma_enable(i2s_device_number_t device_num, uint32_t enable)
-{
-    ccr_t u_ccr;
-
-    if(device_num >= I2S_DEVICE_MAX)
-        return -1;
-
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    u_ccr.ccr.dma_tx_en = enable;
-    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
-
-    return 0;
-}
-
-static int i2s_receive_dma_enable(i2s_device_number_t device_num, uint32_t enable)
-{
-    ccr_t u_ccr;
-
-    if(device_num >= I2S_DEVICE_MAX)
-        return -1;
-
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    u_ccr.ccr.dma_rx_en = enable;
-    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
-
-    return 0;
-}
-
-int i2s_set_dma_divide_16(i2s_device_number_t device_num, uint32_t enable)
-{
-    ccr_t u_ccr;
-
-    if(device_num >= I2S_DEVICE_MAX)
-        return -1;
-
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    u_ccr.ccr.dma_divide_16 = enable;
-    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
-
-    return 0;
-}
-
-int i2s_get_dma_divide_16(i2s_device_number_t device_num)
-{
-    if(device_num >= I2S_DEVICE_MAX)
-        return -1;
-    ccr_t u_ccr;
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    return u_ccr.ccr.dma_divide_16;
-}
-
-int i2s_receive_data(i2s_device_number_t device_num, i2s_channel_num_t channel_num, uint64_t *buf, size_t buf_len)
-{
-    uint32_t i = 0;
-    isr_t u_isr;
-
-    readl(&i2s[device_num]->channel[channel_num].ror);
-    /*clear over run*/
-
-    for(i = 0; i < buf_len;)
-    {
-        u_isr.reg_data = readl(&i2s[device_num]->channel[channel_num].isr);
-        if(u_isr.isr.rxda == 1)
-        {
-            buf[i] = readl(&i2s[device_num]->channel[channel_num].left_rxtx);
-            buf[i] <<= 32;
-            buf[i++] |= readl(&i2s[device_num]->channel[channel_num].right_rxtx);
-        }
-    }
-    return 0;
-}
-
-void i2s_receive_data_dma(i2s_device_number_t device_num, uint32_t *buf,
-                          size_t buf_len, dmac_channel_number_t channel_num)
-{
-    dmac_wait_done(channel_num);
-    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_num * 2);
-    dmac_set_single_mode(channel_num, (void *)(&i2s[device_num]->rxdma), buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
-}
-
-int i2s_rx_to_tx(i2s_device_number_t device_src_num, i2s_device_number_t device_dest_num,
-                 size_t buf_len, dmac_channel_number_t channel_num)
-{
-    static uint8_t dmac_recv_flag[6] = {0, 0, 0, 0, 0, 0};
-    if(dmac_recv_flag[channel_num])
-        dmac_wait_done(channel_num);
-    else
-        dmac_recv_flag[channel_num] = 1;
-    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_src_num * 2);
-    dmac_set_single_mode(channel_num, (void *)(&i2s[device_src_num]->rxdma), (void *)(&i2s[device_dest_num]->txdma), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
-                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
-    return 0;
-}
-
-int i2s_send_data(i2s_device_number_t device_num, i2s_channel_num_t channel_num, const uint8_t *pcm, size_t buf_len,
-                  size_t single_length)
-{
-    isr_t u_isr;
-    uint32_t left_buffer = 0;
-    uint32_t right_buffer = 0;
-    uint32_t i = 0;
-    uint32_t j = 0;
-    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
-        return -1;
-
-    buf_len = buf_len / (single_length / 8) / 2; /* sample num */
-    readl(&i2s[device_num]->channel[channel_num].tor);
-    /* read clear overrun flag */
-
-    for(j = 0; j < buf_len;)
-    {
-        u_isr.reg_data = readl(&i2s[device_num]->channel[channel_num].isr);
-        if(u_isr.isr.txfe == 1)
-        {
-            switch(single_length)
-            {
-                case 16:
-                    left_buffer = ((uint16_t *)pcm)[i++];
-                    right_buffer = ((uint16_t *)pcm)[i++];
-                    break;
-                case 24:
-                    left_buffer = 0;
-                    left_buffer |= pcm[i++];
-                    left_buffer |= pcm[i++] << 8;
-                    left_buffer |= pcm[i++] << 16;
-                    right_buffer = 0;
-                    right_buffer |= pcm[i++];
-                    right_buffer |= pcm[i++] << 8;
-                    right_buffer |= pcm[i++] << 16;
-                    break;
-                case 32:
-                    left_buffer = ((uint32_t *)pcm)[i++];
-                    right_buffer = ((uint32_t *)pcm)[i++];
-                    break;
-                default:
-                    left_buffer = pcm[i++];
-                    right_buffer = pcm[i++];
-                    break;
-            }
-            writel(left_buffer, &i2s[device_num]->channel[channel_num].left_rxtx);
-            writel(right_buffer, &i2s[device_num]->channel[channel_num].right_rxtx);
-            j++;
-        }
-    }
-    return 0;
-}
-
-void i2s_send_data_dma(i2s_device_number_t device_num, const void *buf, size_t buf_len, dmac_channel_number_t channel_num)
-{
-
-    dmac_wait_done(channel_num);
-    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_TX_REQ + device_num * 2);
-    dmac_set_single_mode(channel_num, buf, (void *)(&i2s[device_num]->txdma), DMAC_ADDR_INCREMENT,
-                         DMAC_ADDR_NOCHANGE, DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
-}
-
-static void i2s_parse_voice(i2s_device_number_t device_num, uint32_t *buf, const uint8_t *pcm, size_t length, size_t bits_per_sample,
-                            uint8_t track_num, size_t *send_len)
-{
-    uint32_t i, j = 0;
-    *send_len = length * 2;
-    switch(bits_per_sample)
-    {
-        case 16:
-            for(i = 0; i < length; i++)
-            {
-                buf[2 * i] = ((uint16_t *)pcm)[i];
-                buf[2 * i + 1] = 0;
-            }
-            break;
-        case 24:
-            for(i = 0; i < length; i++)
-            {
-                buf[2 * i] = 0;
-                buf[2 * i] |= pcm[j++];
-                buf[2 * i] |= pcm[j++] << 8;
-                buf[2 * i] |= pcm[j++] << 16;
-                buf[2 * i + 1] = 0;
-                if(track_num == 2)
-                {
-                    buf[2 * i + 1] |= pcm[j++];
-                    buf[2 * i + 1] |= pcm[j++] << 8;
-                    buf[2 * i + 1] |= pcm[j++] << 16;
-                }
-            }
-            break;
-        case 32:
-        default:
-            for(i = 0; i < length; i++)
-            {
-                buf[2 * i] = ((uint32_t *)pcm)[i];
-                buf[2 * i + 1] = 0;
-            }
-            break;
-    }
-}
-
-void i2s_play(i2s_device_number_t device_num, dmac_channel_number_t channel_num,
-              const uint8_t *buf, size_t buf_len, size_t frame, size_t bits_per_sample, uint8_t track_num)
-{
-    const uint8_t *trans_buf;
-    uint32_t i;
-    size_t sample_cnt = buf_len / (bits_per_sample / 8) / track_num;
-    size_t frame_cnt = sample_cnt / frame;
-    size_t frame_remain = sample_cnt % frame;
-    i2s_set_dma_divide_16(device_num, 0);
-
-    if(bits_per_sample == 16 && track_num == 2)
-    {
-        i2s_set_dma_divide_16(device_num, 1);
-        for(i = 0; i < frame_cnt; i++)
-        {
-            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
-            i2s_send_data_dma(device_num, trans_buf, frame, channel_num);
-        }
-        if(frame_remain)
-        {
-            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
-            i2s_send_data_dma(device_num, trans_buf, frame_remain, channel_num);
-        }
-    } else if(bits_per_sample == 32 && track_num == 2)
-    {
-        for(i = 0; i < frame_cnt; i++)
-        {
-            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
-            i2s_send_data_dma(device_num, trans_buf, frame * 2, channel_num);
-        }
-        if(frame_remain)
-        {
-            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
-            i2s_send_data_dma(device_num, trans_buf, frame_remain * 2, channel_num);
-        }
-    } else
-    {
-        uint32_t *buff[2];
-        buff[0] = malloc(frame * 2 * sizeof(uint32_t) * 2);
-        buff[1] = buff[0] + frame * 2;
-        uint8_t flag = 0;
-        size_t send_len = 0;
-        for(i = 0; i < frame_cnt; i++)
-        {
-            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
-            i2s_parse_voice(device_num, buff[flag], trans_buf, frame, bits_per_sample, track_num, &send_len);
-            i2s_send_data_dma(device_num, buff[flag], send_len, channel_num);
-            flag = !flag;
-        }
-        if(frame_remain)
-        {
-            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
-            i2s_parse_voice(device_num, buff[flag], trans_buf, frame_remain, bits_per_sample, track_num, &send_len);
-            i2s_send_data_dma(device_num, trans_buf, send_len, channel_num);
-        }
-        free(buff[0]);
-    }
-}
-
-static inline void i2s_set_sign_expand_en(i2s_device_number_t device_num, uint32_t enable)
-{
-    ccr_t u_ccr;
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    u_ccr.ccr.sign_expand_en = enable;
-    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
-}
-
-void i2s_rx_channel_config(i2s_device_number_t device_num,
-                           i2s_channel_num_t channel_num,
-                           i2s_word_length_t word_length,
-                           i2s_word_select_cycles_t word_select_size,
-                           i2s_fifo_threshold_t trigger_level,
-                           i2s_work_mode_t word_mode)
-{
-    i2s_recv_channel_enable(device_num, channel_num, 0);
-    /* Receive channel disable */
-
-    writel(0, &i2s[device_num]->channel[channel_num].ter);
-    /* disable tx */
-
-    writel(1, &i2s[device_num]->channel[channel_num].rff);
-    /* flash individual fifo */
-
-    writel(1, &i2s[device_num]->rxffr);
-    /* flush tx fifo*/
-
-    i2s_set_rx_word_length(device_num, word_length, channel_num);
-    /* Word buf_len is RESOLUTION_32_BIT */
-
-    i2s_master_configure(device_num,
-                         word_select_size, NO_CLOCK_GATING, word_mode);
-    /* word select size is 32 bits,no clock gating */
-
-    i2s_set_rx_threshold(device_num, trigger_level, channel_num);
-    /* Interrupt trigger when FIFO level is 8 */
-
-    readl(&i2s[device_num]->channel[channel_num].ror);
-    readl(&i2s[device_num]->channel[channel_num].tor);
-
-    i2s_recv_channel_enable(device_num, channel_num, 1);
-}
-
-void i2s_tx_channel_config(i2s_device_number_t device_num,
-                           i2s_channel_num_t channel_num,
-                           i2s_word_length_t word_length,
-                           i2s_word_select_cycles_t word_select_size,
-                           i2s_fifo_threshold_t trigger_level,
-                           i2s_work_mode_t word_mode)
-{
-    writel(0, &i2s[device_num]->channel[channel_num].rer);
-    /* disable rx */
-
-    i2s_transmit_channel_enable(device_num, channel_num, 0);
-    /* Transmit channel disable */
-
-    writel(1, &i2s[device_num]->txffr);
-    /* flush tx fifo */
-    writel(1, &i2s[device_num]->channel[channel_num].tff);
-    /* flush individual fifo */
-
-    i2s_set_tx_word_length(device_num, word_length, channel_num);
-    /* Word buf_len is RESOLUTION_16_BIT */
-
-    i2s_master_configure(device_num, word_select_size, NO_CLOCK_GATING, word_mode);
-    /* word select size is 16 bits,gating after 16 bit */
-
-    i2s_set_tx_threshold(device_num, trigger_level, channel_num);
-    /* Interrupt trigger when FIFO level is 8 */
-
-    i2s_transmit_channel_enable(device_num, channel_num, 1);
-}
-
-void i2s_init(i2s_device_number_t device_num, i2s_transmit_t rxtx_mode, uint32_t channel_mask)
-{
-    sysctl_clock_enable(SYSCTL_CLOCK_I2S0 + device_num);
-    sysctl_reset(SYSCTL_RESET_I2S0 + device_num);
-    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_I2S0 + device_num, 7);
-    /*96k:5,44k:12,24k:23,22k:25 16k:35 sampling*/
-    /*sample rate*32bit*2 =75MHz/((N+1)*2) */
-    i2s_set_enable(device_num, 1);
-    i2s_disable_block(device_num, I2S_TRANSMITTER);
-    i2s_disable_block(device_num, I2S_RECEIVER);
-
-    if(rxtx_mode == I2S_TRANSMITTER)
-    {
-        for(int i = 0; i < 4; i++)
-        {
-            if((channel_mask & 0x3) == 0x3)
-            {
-                i2s_set_mask_interrupt(device_num, I2S_CHANNEL_0 + i, 1, 1, 1, 1);
-                i2s_transimit_enable(device_num, I2S_CHANNEL_0 + i);
-            } else
-            {
-                i2s_transmit_channel_enable(device_num, I2S_CHANNEL_0 + i, 0);
-            }
-            channel_mask >>= 2;
-        }
-        i2s_transmit_dma_enable(device_num, 1);
-    } else
-    {
-        for(int i = 0; i < 4; i++)
-        {
-            if((channel_mask & 0x3) == 0x3)
-            {
-                i2s_set_mask_interrupt(device_num, I2S_CHANNEL_0 + i, 1, 1, 1, 1);
-                i2s_receive_enable(device_num, I2S_CHANNEL_0 + i);
-            } else
-            {
-                i2s_recv_channel_enable(device_num, I2S_CHANNEL_0 + i, 0);
-            }
-            channel_mask >>= 2;
-        }
-        /* Set expand_en when receive */
-        i2s_set_sign_expand_en(device_num, 1);
-        i2s_receive_dma_enable(device_num, 1);
-    }
-}
-
-uint32_t i2s_set_sample_rate(i2s_device_number_t device_num, uint32_t sample_rate)
-{
-    ccr_t u_ccr;
-    uint32_t pll2_clock = 0;
-    pll2_clock = sysctl_pll_get_freq(SYSCTL_PLL2);
-
-    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
-    /* 0x0 for 16sclk cycles, 0x1 for 24 sclk cycles 0x2 for 32 sclk */
-    uint32_t v_clk_word_size = (u_ccr.ccr.clk_word_size + 2) * 8;
-    uint32_t threshold = round(pll2_clock / (sample_rate * 2.0 * v_clk_word_size * 2.0) - 1);
-    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_I2S0 + device_num, threshold);
-    return sysctl_clock_get_freq(SYSCTL_CLOCK_I2S0 + device_num);
-}
-
-int i2s_dmac_irq(void *ctx)
-{
-    i2s_instance_t *v_instance = (i2s_instance_t *)ctx;
-    dmac_irq_unregister(v_instance->dmac_channel);
-    if(v_instance->i2s_int_instance.callback)
-    {
-        v_instance->i2s_int_instance.callback(v_instance->i2s_int_instance.ctx);
-    }
-    return 0;
-}
-
-void i2s_handle_data_dma(i2s_device_number_t device_num, i2s_data_t data, plic_interrupt_t *cb)
-{
-    configASSERT(device_num < I2S_DEVICE_MAX);
-    if(data.transfer_mode == I2S_SEND)
-    {
-        configASSERT(data.tx_buf && data.tx_len);
-        if(!data.nowait_dma_idle)
-        {
-            dmac_wait_done(data.tx_channel);
-        }
-        if(cb)
-        {
-            g_i2s_send_instance[device_num].i2s_int_instance.callback = cb->callback;
-            g_i2s_send_instance[device_num].i2s_int_instance.ctx = cb->ctx;
-            g_i2s_send_instance[device_num].dmac_channel = data.tx_channel;
-            g_i2s_send_instance[device_num].transfer_mode = I2S_SEND;
-            dmac_irq_register(data.tx_channel, i2s_dmac_irq, &g_i2s_send_instance[device_num], cb->priority);
-        }
-        sysctl_dma_select((sysctl_dma_channel_t)data.tx_channel, SYSCTL_DMA_SELECT_I2S0_TX_REQ + device_num * 2);
-        dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&i2s[device_num]->txdma), DMAC_ADDR_INCREMENT,
-                             DMAC_ADDR_NOCHANGE, DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.tx_len);
-        if(!cb && data.wait_dma_done)
-        {
-            dmac_wait_done(data.tx_channel);
-        }
-    } else
-    {
-        configASSERT(data.rx_buf && data.rx_len);
-        if(!data.nowait_dma_idle)
-        {
-            dmac_wait_done(data.rx_channel);
-        }
-        if(cb)
-        {
-            g_i2s_recv_instance[device_num].i2s_int_instance.callback = cb->callback;
-            g_i2s_recv_instance[device_num].i2s_int_instance.ctx = cb->ctx;
-            g_i2s_recv_instance[device_num].dmac_channel = data.rx_channel;
-            g_i2s_recv_instance[device_num].transfer_mode = I2S_RECEIVE;
-            dmac_irq_register(data.rx_channel, i2s_dmac_irq, &g_i2s_recv_instance[device_num], cb->priority);
-        }
-        sysctl_dma_select((sysctl_dma_channel_t)data.rx_channel, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_num * 2);
-        dmac_set_single_mode(data.rx_channel, (void *)(&i2s[device_num]->rxdma), data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                             DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
-        if(!cb && data.wait_dma_done)
-        {
-            dmac_wait_done(data.rx_channel);
-        }
-    }
-}
+/* Copyright 2018 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "i2s.h"
+#include "stdlib.h"
+#include "sysctl.h"
+#include "utils.h"
+
+volatile i2s_t *const i2s[3] =
+    {
+        (volatile i2s_t *)I2S0_BASE_ADDR,
+        (volatile i2s_t *)I2S1_BASE_ADDR,
+        (volatile i2s_t *)I2S2_BASE_ADDR};
+
+typedef struct _i2s_instance
+{
+    i2s_device_number_t i2s_num;
+    i2s_transfer_mode_t transfer_mode;
+    dmac_channel_number_t dmac_channel;
+    plic_instance_t i2s_int_instance;
+} i2s_instance_t;
+
+static i2s_instance_t g_i2s_send_instance[3];
+static i2s_instance_t g_i2s_recv_instance[3];
+
+static int i2s_recv_channel_enable(i2s_device_number_t device_num,
+                                   i2s_channel_num_t channel_num, uint32_t enable)
+{
+    rer_t u_rer;
+
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+    u_rer.reg_data = readl(&i2s[device_num]->channel[channel_num].rer);
+    u_rer.rer.rxchenx = enable;
+    writel(u_rer.reg_data, &i2s[device_num]->channel[channel_num].rer);
+    return 0;
+}
+
+static int i2s_transmit_channel_enable(i2s_device_number_t device_num,
+                                       i2s_channel_num_t channel_num, uint32_t enable)
+{
+    ter_t u_ter;
+
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    u_ter.reg_data = readl(&i2s[device_num]->channel[channel_num].ter);
+    u_ter.ter.txchenx = enable;
+    writel(u_ter.reg_data, &i2s[device_num]->channel[channel_num].ter);
+    return 0;
+}
+
+static void i2s_receive_enable(i2s_device_number_t device_num, i2s_channel_num_t channel_num)
+{
+    irer_t u_irer;
+
+    u_irer.reg_data = readl(&i2s[device_num]->irer);
+    u_irer.irer.rxen = 1;
+    writel(u_irer.reg_data, &i2s[device_num]->irer);
+    /* Receiver block enable */
+
+    i2s_recv_channel_enable(device_num, channel_num, 1);
+    /* Receive channel enable */
+}
+
+static void i2s_transimit_enable(i2s_device_number_t device_num, i2s_channel_num_t channel_num)
+{
+    iter_t u_iter;
+
+    u_iter.reg_data = readl(&i2s[device_num]->iter);
+    u_iter.iter.txen = 1;
+    writel(u_iter.reg_data, &i2s[device_num]->iter);
+    /* Transmitter block enable */
+
+    i2s_transmit_channel_enable(device_num, channel_num, 1);
+    /* Transmit channel enable */
+}
+
+static void i2s_set_enable(i2s_device_number_t device_num, uint32_t enable)
+{
+    ier_t u_ier;
+
+    u_ier.reg_data = readl(&i2s[device_num]->ier);
+    u_ier.ier.ien = enable;
+    writel(u_ier.reg_data, &i2s[device_num]->ier);
+}
+
+static void i2s_disable_block(i2s_device_number_t device_num, i2s_transmit_t rxtx_mode)
+{
+    irer_t u_irer;
+    iter_t u_iter;
+
+    if(rxtx_mode == I2S_RECEIVER)
+    {
+        u_irer.reg_data = readl(&i2s[device_num]->irer);
+        u_irer.irer.rxen = 0;
+        writel(u_irer.reg_data, &i2s[device_num]->irer);
+        /* Receiver block disable */
+    } else
+    {
+        u_iter.reg_data = readl(&i2s[device_num]->iter);
+        u_iter.iter.txen = 0;
+        writel(u_iter.reg_data, &i2s[device_num]->iter);
+        /* Transmitter block disable */
+    }
+}
+
+static int i2s_set_rx_word_length(i2s_device_number_t device_num,
+                                  i2s_word_length_t word_length,
+                                  i2s_channel_num_t channel_num)
+{
+    rcr_tcr_t u_rcr;
+
+    if(word_length > RESOLUTION_32_BIT || word_length < IGNORE_WORD_LENGTH)
+        return -1;
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    u_rcr.reg_data = readl(&i2s[device_num]->channel[channel_num].rcr);
+    u_rcr.rcr_tcr.wlen = word_length;
+    writel(u_rcr.reg_data, &i2s[device_num]->channel[channel_num].rcr);
+    return 0;
+}
+
+static int i2s_set_tx_word_length(i2s_device_number_t device_num,
+                                  i2s_word_length_t word_length,
+                                  i2s_channel_num_t channel_num)
+{
+    rcr_tcr_t u_tcr;
+
+    if(word_length > RESOLUTION_32_BIT || word_length < IGNORE_WORD_LENGTH)
+        return -1;
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    u_tcr.reg_data = readl(&i2s[device_num]->channel[channel_num].tcr);
+    u_tcr.rcr_tcr.wlen = word_length;
+    writel(u_tcr.reg_data, &i2s[device_num]->channel[channel_num].tcr);
+    return 0;
+}
+
+static void i2s_master_configure(i2s_device_number_t device_num,
+                                 i2s_word_select_cycles_t word_select_size,
+                                 i2s_sclk_gating_cycles_t gating_cycles,
+                                 i2s_work_mode_t word_mode)
+{
+    configASSERT(!(word_select_size < SCLK_CYCLES_16 ||
+                   word_select_size > SCLK_CYCLES_32));
+    configASSERT(!(gating_cycles < NO_CLOCK_GATING ||
+                   gating_cycles > CLOCK_CYCLES_24));
+
+    ccr_t u_ccr;
+    cer_t u_cer;
+
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    u_ccr.ccr.clk_word_size = word_select_size;
+    u_ccr.ccr.clk_gate = gating_cycles;
+    u_ccr.ccr.align_mode = word_mode;
+    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
+
+    u_cer.reg_data = readl(&i2s[device_num]->cer);
+    u_cer.cer.clken = 1;
+    writel(u_cer.reg_data, &i2s[device_num]->cer);
+    /* Clock generation enable */
+}
+
+static int i2s_set_rx_threshold(i2s_device_number_t device_num,
+                                i2s_fifo_threshold_t threshold,
+                                i2s_channel_num_t channel_num)
+{
+    rfcr_t u_rfcr;
+
+    if(threshold < TRIGGER_LEVEL_1 || threshold > TRIGGER_LEVEL_16)
+        return -1;
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    u_rfcr.reg_data = readl(&i2s[device_num]->channel[channel_num].rfcr);
+    u_rfcr.rfcr.rxchdt = threshold;
+    writel(u_rfcr.reg_data, &i2s[device_num]->channel[channel_num].rfcr);
+
+    return 0;
+}
+
+static int i2s_set_tx_threshold(i2s_device_number_t device_num,
+                                i2s_fifo_threshold_t threshold,
+                                i2s_channel_num_t channel_num)
+{
+    tfcr_t u_tfcr;
+
+    if(threshold < TRIGGER_LEVEL_1 || threshold > TRIGGER_LEVEL_16)
+        return -1;
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    u_tfcr.reg_data = readl(&i2s[device_num]->channel[channel_num].tfcr);
+    u_tfcr.tfcr.txchet = threshold;
+    writel(u_tfcr.reg_data, &i2s[device_num]->channel[channel_num].tfcr);
+    return 0;
+}
+
+static int i2s_set_mask_interrupt(i2s_device_number_t device_num,
+                                  i2s_channel_num_t channel_num,
+                                  uint32_t rx_available_int, uint32_t rx_overrun_int,
+                                  uint32_t tx_empty_int, uint32_t tx_overrun_int)
+{
+    imr_t u_imr;
+
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+    u_imr.reg_data = readl(&i2s[device_num]->channel[channel_num].imr);
+
+    if(rx_available_int == 1)
+        u_imr.imr.rxdam = 1;
+    else
+        u_imr.imr.rxdam = 0;
+    if(rx_overrun_int == 1)
+        u_imr.imr.rxfom = 1;
+    else
+        u_imr.imr.rxfom = 0;
+
+    if(tx_empty_int == 1)
+        u_imr.imr.txfem = 1;
+    else
+        u_imr.imr.txfem = 0;
+    if(tx_overrun_int == 1)
+        u_imr.imr.txfom = 1;
+    else
+        u_imr.imr.txfom = 0;
+    writel(u_imr.reg_data, &i2s[device_num]->channel[channel_num].imr);
+    return 0;
+}
+
+static int i2s_transmit_dma_enable(i2s_device_number_t device_num, uint32_t enable)
+{
+    ccr_t u_ccr;
+
+    if(device_num >= I2S_DEVICE_MAX)
+        return -1;
+
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    u_ccr.ccr.dma_tx_en = enable;
+    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
+
+    return 0;
+}
+
+static int i2s_receive_dma_enable(i2s_device_number_t device_num, uint32_t enable)
+{
+    ccr_t u_ccr;
+
+    if(device_num >= I2S_DEVICE_MAX)
+        return -1;
+
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    u_ccr.ccr.dma_rx_en = enable;
+    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
+
+    return 0;
+}
+
+int i2s_set_dma_divide_16(i2s_device_number_t device_num, uint32_t enable)
+{
+    ccr_t u_ccr;
+
+    if(device_num >= I2S_DEVICE_MAX)
+        return -1;
+
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    u_ccr.ccr.dma_divide_16 = enable;
+    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
+
+    return 0;
+}
+
+int i2s_get_dma_divide_16(i2s_device_number_t device_num)
+{
+    if(device_num >= I2S_DEVICE_MAX)
+        return -1;
+    ccr_t u_ccr;
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    return u_ccr.ccr.dma_divide_16;
+}
+
+int i2s_receive_data(i2s_device_number_t device_num, i2s_channel_num_t channel_num, uint64_t *buf, size_t buf_len)
+{
+    uint32_t i = 0;
+    isr_t u_isr;
+
+    readl(&i2s[device_num]->channel[channel_num].ror);
+    /*clear over run*/
+
+    for(i = 0; i < buf_len;)
+    {
+        u_isr.reg_data = readl(&i2s[device_num]->channel[channel_num].isr);
+        if(u_isr.isr.rxda == 1)
+        {
+            buf[i] = readl(&i2s[device_num]->channel[channel_num].left_rxtx);
+            buf[i] <<= 32;
+            buf[i++] |= readl(&i2s[device_num]->channel[channel_num].right_rxtx);
+        }
+    }
+    return 0;
+}
+
+void i2s_receive_data_dma(i2s_device_number_t device_num, uint32_t *buf,
+                          size_t buf_len, dmac_channel_number_t channel_num)
+{
+    dmac_wait_done(channel_num);
+    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_num * 2);
+    dmac_set_single_mode(channel_num, (void *)(&i2s[device_num]->rxdma), buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
+}
+
+int i2s_rx_to_tx(i2s_device_number_t device_src_num, i2s_device_number_t device_dest_num,
+                 size_t buf_len, dmac_channel_number_t channel_num)
+{
+    static uint8_t dmac_recv_flag[6] = {0, 0, 0, 0, 0, 0};
+    if(dmac_recv_flag[channel_num])
+        dmac_wait_done(channel_num);
+    else
+        dmac_recv_flag[channel_num] = 1;
+    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_src_num * 2);
+    dmac_set_single_mode(channel_num, (void *)(&i2s[device_src_num]->rxdma), (void *)(&i2s[device_dest_num]->txdma), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
+                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
+    return 0;
+}
+
+int i2s_send_data(i2s_device_number_t device_num, i2s_channel_num_t channel_num, const uint8_t *pcm, size_t buf_len,
+                  size_t single_length)
+{
+    isr_t u_isr;
+    uint32_t left_buffer = 0;
+    uint32_t right_buffer = 0;
+    uint32_t i = 0;
+    uint32_t j = 0;
+    if(channel_num < I2S_CHANNEL_0 || channel_num > I2S_CHANNEL_3)
+        return -1;
+
+    buf_len = buf_len / (single_length / 8) / 2; /* sample num */
+    readl(&i2s[device_num]->channel[channel_num].tor);
+    /* read clear overrun flag */
+
+    for(j = 0; j < buf_len;)
+    {
+        u_isr.reg_data = readl(&i2s[device_num]->channel[channel_num].isr);
+        if(u_isr.isr.txfe == 1)
+        {
+            switch(single_length)
+            {
+                case 16:
+                    left_buffer = ((uint16_t *)pcm)[i++];
+                    right_buffer = ((uint16_t *)pcm)[i++];
+                    break;
+                case 24:
+                    left_buffer = 0;
+                    left_buffer |= pcm[i++];
+                    left_buffer |= pcm[i++] << 8;
+                    left_buffer |= pcm[i++] << 16;
+                    right_buffer = 0;
+                    right_buffer |= pcm[i++];
+                    right_buffer |= pcm[i++] << 8;
+                    right_buffer |= pcm[i++] << 16;
+                    break;
+                case 32:
+                    left_buffer = ((uint32_t *)pcm)[i++];
+                    right_buffer = ((uint32_t *)pcm)[i++];
+                    break;
+                default:
+                    left_buffer = pcm[i++];
+                    right_buffer = pcm[i++];
+                    break;
+            }
+            writel(left_buffer, &i2s[device_num]->channel[channel_num].left_rxtx);
+            writel(right_buffer, &i2s[device_num]->channel[channel_num].right_rxtx);
+            j++;
+        }
+    }
+    return 0;
+}
+
+void i2s_send_data_dma(i2s_device_number_t device_num, const void *buf, size_t buf_len, dmac_channel_number_t channel_num)
+{
+
+    dmac_wait_done(channel_num);
+    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_I2S0_TX_REQ + device_num * 2);
+    dmac_set_single_mode(channel_num, buf, (void *)(&i2s[device_num]->txdma), DMAC_ADDR_INCREMENT,
+                         DMAC_ADDR_NOCHANGE, DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
+}
+
+static void i2s_parse_voice(i2s_device_number_t device_num, uint32_t *buf, const uint8_t *pcm, size_t length, size_t bits_per_sample,
+                            uint8_t track_num, size_t *send_len)
+{
+    uint32_t i, j = 0;
+    *send_len = length * 2;
+    switch(bits_per_sample)
+    {
+        case 16:
+            for(i = 0; i < length; i++)
+            {
+                buf[2 * i] = ((uint16_t *)pcm)[i];
+                buf[2 * i + 1] = 0;
+            }
+            break;
+        case 24:
+            for(i = 0; i < length; i++)
+            {
+                buf[2 * i] = 0;
+                buf[2 * i] |= pcm[j++];
+                buf[2 * i] |= pcm[j++] << 8;
+                buf[2 * i] |= pcm[j++] << 16;
+                buf[2 * i + 1] = 0;
+                if(track_num == 2)
+                {
+                    buf[2 * i + 1] |= pcm[j++];
+                    buf[2 * i + 1] |= pcm[j++] << 8;
+                    buf[2 * i + 1] |= pcm[j++] << 16;
+                }
+            }
+            break;
+        case 32:
+        default:
+            for(i = 0; i < length; i++)
+            {
+                buf[2 * i] = ((uint32_t *)pcm)[i];
+                buf[2 * i + 1] = 0;
+            }
+            break;
+    }
+}
+
+void i2s_play(i2s_device_number_t device_num, dmac_channel_number_t channel_num,
+              const uint8_t *buf, size_t buf_len, size_t frame, size_t bits_per_sample, uint8_t track_num)
+{
+    const uint8_t *trans_buf;
+    uint32_t i;
+    size_t sample_cnt = buf_len / (bits_per_sample / 8) / track_num;
+    size_t frame_cnt = sample_cnt / frame;
+    size_t frame_remain = sample_cnt % frame;
+    i2s_set_dma_divide_16(device_num, 0);
+
+    if(bits_per_sample == 16 && track_num == 2)
+    {
+        i2s_set_dma_divide_16(device_num, 1);
+        for(i = 0; i < frame_cnt; i++)
+        {
+            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
+            i2s_send_data_dma(device_num, trans_buf, frame, channel_num);
+        }
+        if(frame_remain)
+        {
+            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
+            i2s_send_data_dma(device_num, trans_buf, frame_remain, channel_num);
+        }
+    } else if(bits_per_sample == 32 && track_num == 2)
+    {
+        for(i = 0; i < frame_cnt; i++)
+        {
+            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
+            i2s_send_data_dma(device_num, trans_buf, frame * 2, channel_num);
+        }
+        if(frame_remain)
+        {
+            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
+            i2s_send_data_dma(device_num, trans_buf, frame_remain * 2, channel_num);
+        }
+    } else
+    {
+        uint32_t *buff[2];
+        buff[0] = malloc(frame * 2 * sizeof(uint32_t) * 2);
+        buff[1] = buff[0] + frame * 2;
+        uint8_t flag = 0;
+        size_t send_len = 0;
+        for(i = 0; i < frame_cnt; i++)
+        {
+            trans_buf = buf + i * frame * (bits_per_sample / 8) * track_num;
+            i2s_parse_voice(device_num, buff[flag], trans_buf, frame, bits_per_sample, track_num, &send_len);
+            i2s_send_data_dma(device_num, buff[flag], send_len, channel_num);
+            flag = !flag;
+        }
+        if(frame_remain)
+        {
+            trans_buf = buf + frame_cnt * frame * (bits_per_sample / 8) * track_num;
+            i2s_parse_voice(device_num, buff[flag], trans_buf, frame_remain, bits_per_sample, track_num, &send_len);
+            i2s_send_data_dma(device_num, trans_buf, send_len, channel_num);
+        }
+        free(buff[0]);
+    }
+}
+
+static inline void i2s_set_sign_expand_en(i2s_device_number_t device_num, uint32_t enable)
+{
+    ccr_t u_ccr;
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    u_ccr.ccr.sign_expand_en = enable;
+    writel(u_ccr.reg_data, &i2s[device_num]->ccr);
+}
+
+void i2s_rx_channel_config(i2s_device_number_t device_num,
+                           i2s_channel_num_t channel_num,
+                           i2s_word_length_t word_length,
+                           i2s_word_select_cycles_t word_select_size,
+                           i2s_fifo_threshold_t trigger_level,
+                           i2s_work_mode_t word_mode)
+{
+    i2s_recv_channel_enable(device_num, channel_num, 0);
+    /* Receive channel disable */
+
+    writel(0, &i2s[device_num]->channel[channel_num].ter);
+    /* disable tx */
+
+    writel(1, &i2s[device_num]->channel[channel_num].rff);
+    /* flash individual fifo */
+
+    writel(1, &i2s[device_num]->rxffr);
+    /* flush tx fifo*/
+
+    i2s_set_rx_word_length(device_num, word_length, channel_num);
+    /* Word buf_len is RESOLUTION_32_BIT */
+
+    i2s_master_configure(device_num,
+                         word_select_size, NO_CLOCK_GATING, word_mode);
+    /* word select size is 32 bits,no clock gating */
+
+    i2s_set_rx_threshold(device_num, trigger_level, channel_num);
+    /* Interrupt trigger when FIFO level is 8 */
+
+    readl(&i2s[device_num]->channel[channel_num].ror);
+    readl(&i2s[device_num]->channel[channel_num].tor);
+
+    i2s_recv_channel_enable(device_num, channel_num, 1);
+}
+
+void i2s_tx_channel_config(i2s_device_number_t device_num,
+                           i2s_channel_num_t channel_num,
+                           i2s_word_length_t word_length,
+                           i2s_word_select_cycles_t word_select_size,
+                           i2s_fifo_threshold_t trigger_level,
+                           i2s_work_mode_t word_mode)
+{
+    writel(0, &i2s[device_num]->channel[channel_num].rer);
+    /* disable rx */
+
+    i2s_transmit_channel_enable(device_num, channel_num, 0);
+    /* Transmit channel disable */
+
+    writel(1, &i2s[device_num]->txffr);
+    /* flush tx fifo */
+    writel(1, &i2s[device_num]->channel[channel_num].tff);
+    /* flush individual fifo */
+
+    i2s_set_tx_word_length(device_num, word_length, channel_num);
+    /* Word buf_len is RESOLUTION_16_BIT */
+
+    i2s_master_configure(device_num, word_select_size, NO_CLOCK_GATING, word_mode);
+    /* word select size is 16 bits,gating after 16 bit */
+
+    i2s_set_tx_threshold(device_num, trigger_level, channel_num);
+    /* Interrupt trigger when FIFO level is 8 */
+
+    i2s_transmit_channel_enable(device_num, channel_num, 1);
+}
+
+void i2s_init(i2s_device_number_t device_num, i2s_transmit_t rxtx_mode, uint32_t channel_mask)
+{
+    sysctl_clock_enable(SYSCTL_CLOCK_I2S0 + device_num);
+    sysctl_reset(SYSCTL_RESET_I2S0 + device_num);
+    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_I2S0 + device_num, 7);
+    /*96k:5,44k:12,24k:23,22k:25 16k:35 sampling*/
+    /*sample rate*32bit*2 =75MHz/((N+1)*2) */
+    i2s_set_enable(device_num, 1);
+    i2s_disable_block(device_num, I2S_TRANSMITTER);
+    i2s_disable_block(device_num, I2S_RECEIVER);
+
+    if(rxtx_mode == I2S_TRANSMITTER)
+    {
+        for(int i = 0; i < 4; i++)
+        {
+            if((channel_mask & 0x3) == 0x3)
+            {
+                i2s_set_mask_interrupt(device_num, I2S_CHANNEL_0 + i, 1, 1, 1, 1);
+                i2s_transimit_enable(device_num, I2S_CHANNEL_0 + i);
+            } else
+            {
+                i2s_transmit_channel_enable(device_num, I2S_CHANNEL_0 + i, 0);
+            }
+            channel_mask >>= 2;
+        }
+        i2s_transmit_dma_enable(device_num, 1);
+    } else
+    {
+        for(int i = 0; i < 4; i++)
+        {
+            if((channel_mask & 0x3) == 0x3)
+            {
+                i2s_set_mask_interrupt(device_num, I2S_CHANNEL_0 + i, 1, 1, 1, 1);
+                i2s_receive_enable(device_num, I2S_CHANNEL_0 + i);
+            } else
+            {
+                i2s_recv_channel_enable(device_num, I2S_CHANNEL_0 + i, 0);
+            }
+            channel_mask >>= 2;
+        }
+        /* Set expand_en when receive */
+        i2s_set_sign_expand_en(device_num, 1);
+        i2s_receive_dma_enable(device_num, 1);
+    }
+}
+
+uint32_t i2s_set_sample_rate(i2s_device_number_t device_num, uint32_t sample_rate)
+{
+    ccr_t u_ccr;
+    uint32_t pll2_clock = 0;
+    pll2_clock = sysctl_pll_get_freq(SYSCTL_PLL2);
+
+    u_ccr.reg_data = readl(&i2s[device_num]->ccr);
+    /* 0x0 for 16sclk cycles, 0x1 for 24 sclk cycles 0x2 for 32 sclk */
+    uint32_t v_clk_word_size = (u_ccr.ccr.clk_word_size + 2) * 8;
+    uint32_t threshold = round(pll2_clock / (sample_rate * 2.0 * v_clk_word_size * 2.0) - 1);
+    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_I2S0 + device_num, threshold);
+    return sysctl_clock_get_freq(SYSCTL_CLOCK_I2S0 + device_num);
+}
+
+int i2s_dmac_irq(void *ctx)
+{
+    i2s_instance_t *v_instance = (i2s_instance_t *)ctx;
+    dmac_irq_unregister(v_instance->dmac_channel);
+    if(v_instance->i2s_int_instance.callback)
+    {
+        v_instance->i2s_int_instance.callback(v_instance->i2s_int_instance.ctx);
+    }
+    return 0;
+}
+
+void i2s_handle_data_dma(i2s_device_number_t device_num, i2s_data_t data, plic_interrupt_t *cb)
+{
+    configASSERT(device_num < I2S_DEVICE_MAX);
+    if(data.transfer_mode == I2S_SEND)
+    {
+        configASSERT(data.tx_buf && data.tx_len);
+        if(!data.nowait_dma_idle)
+        {
+            dmac_wait_done(data.tx_channel);
+        }
+        if(cb)
+        {
+            g_i2s_send_instance[device_num].i2s_int_instance.callback = cb->callback;
+            g_i2s_send_instance[device_num].i2s_int_instance.ctx = cb->ctx;
+            g_i2s_send_instance[device_num].dmac_channel = data.tx_channel;
+            g_i2s_send_instance[device_num].transfer_mode = I2S_SEND;
+            dmac_irq_register(data.tx_channel, i2s_dmac_irq, &g_i2s_send_instance[device_num], cb->priority);
+        }
+        sysctl_dma_select((sysctl_dma_channel_t)data.tx_channel, SYSCTL_DMA_SELECT_I2S0_TX_REQ + device_num * 2);
+        dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&i2s[device_num]->txdma), DMAC_ADDR_INCREMENT,
+                             DMAC_ADDR_NOCHANGE, DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.tx_len);
+        if(!cb && data.wait_dma_done)
+        {
+            dmac_wait_done(data.tx_channel);
+        }
+    } else
+    {
+        configASSERT(data.rx_buf && data.rx_len);
+        if(!data.nowait_dma_idle)
+        {
+            dmac_wait_done(data.rx_channel);
+        }
+        if(cb)
+        {
+            g_i2s_recv_instance[device_num].i2s_int_instance.callback = cb->callback;
+            g_i2s_recv_instance[device_num].i2s_int_instance.ctx = cb->ctx;
+            g_i2s_recv_instance[device_num].dmac_channel = data.rx_channel;
+            g_i2s_recv_instance[device_num].transfer_mode = I2S_RECEIVE;
+            dmac_irq_register(data.rx_channel, i2s_dmac_irq, &g_i2s_recv_instance[device_num], cb->priority);
+        }
+        sysctl_dma_select((sysctl_dma_channel_t)data.rx_channel, SYSCTL_DMA_SELECT_I2S0_RX_REQ + device_num * 2);
+        dmac_set_single_mode(data.rx_channel, (void *)(&i2s[device_num]->rxdma), data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                             DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
+        if(!cb && data.wait_dma_done)
+        {
+            dmac_wait_done(data.rx_channel);
+        }
+    }
+}

+ 496 - 496
lib/drivers/include/spi.h

@@ -1,496 +1,496 @@
-/* Copyright 2018 Canaan Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef _DRIVER_SPI_H
-#define _DRIVER_SPI_H
-
-#include <stddef.h>
-#include <stdint.h>
-#include "dmac.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* clang-format off */
-typedef struct _spi
-{
-    /* SPI Control Register 0                                    (0x00)*/
-    volatile uint32_t ctrlr0;
-    /* SPI Control Register 1                                    (0x04)*/
-    volatile uint32_t ctrlr1;
-    /* SPI Enable Register                                       (0x08)*/
-    volatile uint32_t ssienr;
-    /* SPI Microwire Control Register                            (0x0c)*/
-    volatile uint32_t mwcr;
-    /* SPI Slave Enable Register                                 (0x10)*/
-    volatile uint32_t ser;
-    /* SPI Baud Rate Select                                      (0x14)*/
-    volatile uint32_t baudr;
-    /* SPI Transmit FIFO Threshold Level                         (0x18)*/
-    volatile uint32_t txftlr;
-    /* SPI Receive FIFO Threshold Level                          (0x1c)*/
-    volatile uint32_t rxftlr;
-    /* SPI Transmit FIFO Level Register                          (0x20)*/
-    volatile uint32_t txflr;
-    /* SPI Receive FIFO Level Register                           (0x24)*/
-    volatile uint32_t rxflr;
-    /* SPI Status Register                                       (0x28)*/
-    volatile uint32_t sr;
-    /* SPI Interrupt Mask Register                               (0x2c)*/
-    volatile uint32_t imr;
-    /* SPI Interrupt Status Register                             (0x30)*/
-    volatile uint32_t isr;
-    /* SPI Raw Interrupt Status Register                         (0x34)*/
-    volatile uint32_t risr;
-    /* SPI Transmit FIFO Overflow Interrupt Clear Register       (0x38)*/
-    volatile uint32_t txoicr;
-    /* SPI Receive FIFO Overflow Interrupt Clear Register        (0x3c)*/
-    volatile uint32_t rxoicr;
-    /* SPI Receive FIFO Underflow Interrupt Clear Register       (0x40)*/
-    volatile uint32_t rxuicr;
-    /* SPI Multi-Master Interrupt Clear Register                 (0x44)*/
-    volatile uint32_t msticr;
-    /* SPI Interrupt Clear Register                              (0x48)*/
-    volatile uint32_t icr;
-    /* SPI DMA Control Register                                  (0x4c)*/
-    volatile uint32_t dmacr;
-    /* SPI DMA Transmit Data Level                               (0x50)*/
-    volatile uint32_t dmatdlr;
-    /* SPI DMA Receive Data Level                                (0x54)*/
-    volatile uint32_t dmardlr;
-    /* SPI Identification Register                               (0x58)*/
-    volatile uint32_t idr;
-    /* SPI DWC_ssi component version                             (0x5c)*/
-    volatile uint32_t ssic_version_id;
-    /* SPI Data Register 0-36                                    (0x60 -- 0xec)*/
-    volatile uint32_t dr[36];
-    /* SPI RX Sample Delay Register                              (0xf0)*/
-    volatile uint32_t rx_sample_delay;
-    /* SPI SPI Control Register                                  (0xf4)*/
-    volatile uint32_t spi_ctrlr0;
-    /* reserved                                                  (0xf8)*/
-    volatile uint32_t resv;
-    /* SPI XIP Mode bits                                         (0xfc)*/
-    volatile uint32_t xip_mode_bits;
-    /* SPI XIP INCR transfer opcode                              (0x100)*/
-    volatile uint32_t xip_incr_inst;
-    /* SPI XIP WRAP transfer opcode                              (0x104)*/
-    volatile uint32_t xip_wrap_inst;
-    /* SPI XIP Control Register                                  (0x108)*/
-    volatile uint32_t xip_ctrl;
-    /* SPI XIP Slave Enable Register                             (0x10c)*/
-    volatile uint32_t xip_ser;
-    /* SPI XIP Receive FIFO Overflow Interrupt Clear Register    (0x110)*/
-    volatile uint32_t xrxoicr;
-    /* SPI XIP time out register for continuous transfers        (0x114)*/
-    volatile uint32_t xip_cnt_time_out;
-    volatile uint32_t endian;
-} __attribute__((packed, aligned(4))) spi_t;
-/* clang-format on */
-
-typedef enum _spi_device_num
-{
-    SPI_DEVICE_0,
-    SPI_DEVICE_1,
-    SPI_DEVICE_2,
-    SPI_DEVICE_3,
-    SPI_DEVICE_MAX,
-} spi_device_num_t;
-
-typedef enum _spi_work_mode
-{
-    SPI_WORK_MODE_0,
-    SPI_WORK_MODE_1,
-    SPI_WORK_MODE_2,
-    SPI_WORK_MODE_3,
-} spi_work_mode_t;
-
-typedef enum _spi_frame_format
-{
-    SPI_FF_STANDARD,
-    SPI_FF_DUAL,
-    SPI_FF_QUAD,
-    SPI_FF_OCTAL
-} spi_frame_format_t;
-
-typedef enum _spi_instruction_address_trans_mode
-{
-    SPI_AITM_STANDARD,
-    SPI_AITM_ADDR_STANDARD,
-    SPI_AITM_AS_FRAME_FORMAT
-} spi_instruction_address_trans_mode_t;
-
-typedef enum _spi_transfer_mode
-{
-    SPI_TMOD_TRANS_RECV,
-    SPI_TMOD_TRANS,
-    SPI_TMOD_RECV,
-    SPI_TMOD_EEROM
-} spi_transfer_mode_t;
-
-typedef enum _spi_transfer_width
-{
-    SPI_TRANS_CHAR = 0x1,
-    SPI_TRANS_SHORT = 0x2,
-    SPI_TRANS_INT = 0x4,
-} spi_transfer_width_t;
-
-typedef enum _spi_chip_select
-{
-    SPI_CHIP_SELECT_0,
-    SPI_CHIP_SELECT_1,
-    SPI_CHIP_SELECT_2,
-    SPI_CHIP_SELECT_3,
-    SPI_CHIP_SELECT_MAX,
-} spi_chip_select_t;
-
-typedef enum
-{
-    WRITE_CONFIG,
-    READ_CONFIG,
-    WRITE_DATA_BYTE,
-    READ_DATA_BYTE,
-    WRITE_DATA_BLOCK,
-    READ_DATA_BLOCK,
-} spi_slave_command_e;
-
-typedef struct
-{
-    uint8_t cmd;
-    uint8_t err;
-    uint32_t addr;
-    uint32_t len;
-} spi_slave_command_t;
-
-typedef enum
-{
-    IDLE,
-    COMMAND,
-    TRANSFER,
-} spi_slave_status_e;
-
-typedef int (*spi_slave_receive_callback_t)(void *ctx);
-
-typedef struct _spi_slave_instance
-{
-    uint8_t int_pin;
-    uint8_t ready_pin;
-    dmac_channel_number_t dmac_channel;
-    uint8_t dfs;
-    uint8_t slv_oe;
-    uint8_t work_mode;
-    size_t data_bit_length;
-    volatile spi_slave_status_e status;
-    volatile spi_slave_command_t command;
-    volatile uint8_t *config_ptr;
-    uint32_t config_len;
-    spi_slave_receive_callback_t callback;
-    uint8_t is_dual;
-    uint8_t mosi_pin;
-    uint8_t miso_pin;
-} spi_slave_instance_t;
-
-typedef struct _spi_data_t
-{
-    dmac_channel_number_t tx_channel;
-    dmac_channel_number_t rx_channel;
-    uint32_t *tx_buf;
-    size_t tx_len;
-    uint32_t *rx_buf;
-    size_t rx_len;
-    spi_transfer_mode_t transfer_mode;
-    bool fill_mode;
-} spi_data_t;
-
-extern volatile spi_t *const spi[4];
-
-/**
- * @brief       Set spi configuration
- *
- * @param[in]   spi_num             Spi bus number
- * @param[in]   mode                Spi mode
- * @param[in]   frame_format        Spi frame format
- * @param[in]   data_bit_length     Spi data bit length
- * @param[in]   endian              0:little-endian 1:big-endian
- *
- * @return      Void
- */
-void spi_init(spi_device_num_t spi_num, spi_work_mode_t work_mode, spi_frame_format_t frame_format,
-              size_t data_bit_length, uint32_t endian);
-
-/**
- * @brief       Set multiline configuration
- *
- * @param[in]   spi_num                                 Spi bus number
- * @param[in]   instruction_length                      Instruction length
- * @param[in]   address_length                          Address length
- * @param[in]   wait_cycles                             Wait cycles
- * @param[in]   instruction_address_trans_mode          Spi transfer mode
- *
- */
-void spi_init_non_standard(spi_device_num_t spi_num, uint32_t instruction_length, uint32_t address_length,
-                           uint32_t wait_cycles, spi_instruction_address_trans_mode_t instruction_address_trans_mode);
-
-/**
- * @brief       Spi send data
- *
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   cmd_buff        Spi command buffer point
- * @param[in]   cmd_len         Spi command length
- * @param[in]   tx_buff         Spi transmit buffer point
- * @param[in]   tx_len          Spi transmit buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_send_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
-
-/**
- * @brief       Spi receive data
- *
- * @param[in]   spi_num             Spi bus number
- * @param[in]   chip_select         Spi chip select
- * @param[in]   cmd_buff            Spi command buffer point
- * @param[in]   cmd_len             Spi command length
- * @param[in]   rx_buff             Spi receive buffer point
- * @param[in]   rx_len              Spi receive buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
-
-/**
- * @brief       Spi special receive data
- *
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   cmd_buff        Spi command buffer point
- * @param[in]   cmd_len         Spi command length
- * @param[in]   rx_buff         Spi receive buffer point
- * @param[in]   rx_len          Spi receive buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
-
-/**
- * @brief       Spi special send data
- *
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   cmd_buff        Spi command buffer point
- * @param[in]   cmd_len         Spi command length
- * @param[in]   tx_buff         Spi transmit buffer point
- * @param[in]   tx_len          Spi transmit buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
-
-/**
- * @brief       Spi send data by dma
- *
- * @param[in]   channel_num     Dmac channel number
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   cmd_buff        Spi command buffer point
- * @param[in]   cmd_len         Spi command length
- * @param[in]   tx_buff         Spi transmit buffer point
- * @param[in]   tx_len          Spi transmit buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_send_data_standard_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                                spi_chip_select_t chip_select,
-                                const uint8_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
-
-/**
- * @brief       Spi receive data by dma
- *
- * @param[in]   w_channel_num       Dmac write channel number
- * @param[in]   r_channel_num       Dmac read channel number
- * @param[in]   spi_num             Spi bus number
- * @param[in]   chip_select         Spi chip select
- * @param[in]   cmd_buff            Spi command buffer point
- * @param[in]   cmd_len             Spi command length
- * @param[in]   rx_buff             Spi receive buffer point
- * @param[in]   rx_len              Spi receive buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
-
-/**
- * @brief       Spi special send data by dma
- *
- * @param[in]   channel_num     Dmac channel number
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   cmd_buff        Spi command buffer point
- * @param[in]   cmd_len         Spi command length
- * @param[in]   tx_buff         Spi transmit buffer point
- * @param[in]   tx_len          Spi transmit buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                                spi_chip_select_t chip_select,
-                                const uint32_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
-
-/**
- * @brief       Spi special receive data by dma
- *
- * @param[in]   dma_send_channel_num        Dmac write channel number
- * @param[in]   dma_receive_channel_num     Dmac read channel number
- * @param[in]   spi_num                     Spi bus number
- * @param[in]   chip_select                 Spi chip select
- * @param[in]   cmd_buff                    Spi command buffer point
- * @param[in]   cmd_len                     Spi command length
- * @param[in]   rx_buff                     Spi receive buffer point
- * @param[in]   rx_len                      Spi receive buffer length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
-
-/**
- * @brief       Spi fill dma
- *
- * @param[in]   channel_num     Dmac channel number
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   tx_buff        Spi command buffer point
- * @param[in]   tx_len         Spi command length
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,
-                       const uint32_t *tx_buff, size_t tx_len);
-
-/**
- * @brief       Spi normal send by dma
- *
- * @param[in]   channel_num     Dmac channel number
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   tx_buff         Spi transmit buffer point
- * @param[in]   tx_len          Spi transmit buffer length
- * @param[in]   stw             Spi transfer width
- *
- * @return      Result
- *     - 0      Success
- *     - Other  Fail
- */
-void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                              spi_chip_select_t chip_select,
-                              const void *tx_buff, size_t tx_len, spi_transfer_width_t spi_transfer_width);
-
-/**
- * @brief       Spi normal send by dma
- *
- * @param[in]   spi_num         Spi bus number
- * @param[in]   spi_clk         Spi clock rate
- *
- * @return      The real spi clock rate
- */
-uint32_t spi_set_clk_rate(spi_device_num_t spi_num, uint32_t spi_clk);
-
-/**
- * @brief       Spi full duplex send receive data by dma
- *
- * @param[in]   dma_send_channel_num          Dmac write channel number
- * @param[in]   dma_receive_channel_num       Dmac read channel number
- * @param[in]   spi_num                       Spi bus number
- * @param[in]   chip_select                   Spi chip select
- * @param[in]   tx_buf                        Spi send buffer
- * @param[in]   tx_len                        Spi send buffer length
- * @param[in]   rx_buf                        Spi receive buffer
- * @param[in]   rx_len                        Spi receive buffer length
- *
- */
-void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select,
-                                   const uint8_t *tx_buf, size_t tx_len, uint8_t *rx_buf, size_t rx_len);
-
-/**
- * @brief       Set spi slave configuration
- *
- * @param[in]   int_pin             SPI master starts sending data interrupt.
- * @param[in]   ready_pin           SPI slave ready.
- * @param[in]   dmac_channel        Dmac channel number for block.
- * @param[in]   data_bit_length     Spi data bit length
- * @param[in]   data                SPI slave device data buffer.
- * @param[in]   len                 The length of SPI slave device data buffer.
- * @param[in]   callback            Callback of spi slave.
- *
- * @return      Void
- */
-void spi_slave_config(uint8_t int_pin, uint8_t ready_pin, dmac_channel_number_t dmac_channel, size_t data_bit_length, uint8_t *data, uint32_t len, spi_slave_receive_callback_t callback);
-
-void spi_slave_dual_config(uint8_t int_pin,
-                           uint8_t ready_pin,
-                           uint8_t mosi_pin,
-                           uint8_t miso_pin,
-                           dmac_channel_number_t dmac_channel,
-                           size_t data_bit_length,
-                           uint8_t *data,
-                           uint32_t len,
-                           spi_slave_receive_callback_t callback);
-
-/**
- * @brief       Spi handle transfer data operations
- *
- * @param[in]   spi_num         Spi bus number
- * @param[in]   chip_select     Spi chip select
- * @param[in]   data            Spi transfer data information
- * @param[in]   cb              Spi DMA callback
- *
- */
-void spi_handle_data_dma(spi_device_num_t spi_num, spi_chip_select_t chip_select, spi_data_t data, plic_interrupt_t *cb);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DRIVER_SPI_H */
+/* Copyright 2018 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _DRIVER_SPI_H
+#define _DRIVER_SPI_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "dmac.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* clang-format off */
+typedef struct _spi
+{
+    /* SPI Control Register 0                                    (0x00)*/
+    volatile uint32_t ctrlr0;
+    /* SPI Control Register 1                                    (0x04)*/
+    volatile uint32_t ctrlr1;
+    /* SPI Enable Register                                       (0x08)*/
+    volatile uint32_t ssienr;
+    /* SPI Microwire Control Register                            (0x0c)*/
+    volatile uint32_t mwcr;
+    /* SPI Slave Enable Register                                 (0x10)*/
+    volatile uint32_t ser;
+    /* SPI Baud Rate Select                                      (0x14)*/
+    volatile uint32_t baudr;
+    /* SPI Transmit FIFO Threshold Level                         (0x18)*/
+    volatile uint32_t txftlr;
+    /* SPI Receive FIFO Threshold Level                          (0x1c)*/
+    volatile uint32_t rxftlr;
+    /* SPI Transmit FIFO Level Register                          (0x20)*/
+    volatile uint32_t txflr;
+    /* SPI Receive FIFO Level Register                           (0x24)*/
+    volatile uint32_t rxflr;
+    /* SPI Status Register                                       (0x28)*/
+    volatile uint32_t sr;
+    /* SPI Interrupt Mask Register                               (0x2c)*/
+    volatile uint32_t imr;
+    /* SPI Interrupt Status Register                             (0x30)*/
+    volatile uint32_t isr;
+    /* SPI Raw Interrupt Status Register                         (0x34)*/
+    volatile uint32_t risr;
+    /* SPI Transmit FIFO Overflow Interrupt Clear Register       (0x38)*/
+    volatile uint32_t txoicr;
+    /* SPI Receive FIFO Overflow Interrupt Clear Register        (0x3c)*/
+    volatile uint32_t rxoicr;
+    /* SPI Receive FIFO Underflow Interrupt Clear Register       (0x40)*/
+    volatile uint32_t rxuicr;
+    /* SPI Multi-Master Interrupt Clear Register                 (0x44)*/
+    volatile uint32_t msticr;
+    /* SPI Interrupt Clear Register                              (0x48)*/
+    volatile uint32_t icr;
+    /* SPI DMA Control Register                                  (0x4c)*/
+    volatile uint32_t dmacr;
+    /* SPI DMA Transmit Data Level                               (0x50)*/
+    volatile uint32_t dmatdlr;
+    /* SPI DMA Receive Data Level                                (0x54)*/
+    volatile uint32_t dmardlr;
+    /* SPI Identification Register                               (0x58)*/
+    volatile uint32_t idr;
+    /* SPI DWC_ssi component version                             (0x5c)*/
+    volatile uint32_t ssic_version_id;
+    /* SPI Data Register 0-36                                    (0x60 -- 0xec)*/
+    volatile uint32_t dr[36];
+    /* SPI RX Sample Delay Register                              (0xf0)*/
+    volatile uint32_t rx_sample_delay;
+    /* SPI SPI Control Register                                  (0xf4)*/
+    volatile uint32_t spi_ctrlr0;
+    /* reserved                                                  (0xf8)*/
+    volatile uint32_t resv;
+    /* SPI XIP Mode bits                                         (0xfc)*/
+    volatile uint32_t xip_mode_bits;
+    /* SPI XIP INCR transfer opcode                              (0x100)*/
+    volatile uint32_t xip_incr_inst;
+    /* SPI XIP WRAP transfer opcode                              (0x104)*/
+    volatile uint32_t xip_wrap_inst;
+    /* SPI XIP Control Register                                  (0x108)*/
+    volatile uint32_t xip_ctrl;
+    /* SPI XIP Slave Enable Register                             (0x10c)*/
+    volatile uint32_t xip_ser;
+    /* SPI XIP Receive FIFO Overflow Interrupt Clear Register    (0x110)*/
+    volatile uint32_t xrxoicr;
+    /* SPI XIP time out register for continuous transfers        (0x114)*/
+    volatile uint32_t xip_cnt_time_out;
+    volatile uint32_t endian;
+} __attribute__((packed, aligned(4))) spi_t;
+/* clang-format on */
+
+typedef enum _spi_device_num
+{
+    SPI_DEVICE_0,
+    SPI_DEVICE_1,
+    SPI_DEVICE_2,
+    SPI_DEVICE_3,
+    SPI_DEVICE_MAX,
+} spi_device_num_t;
+
+typedef enum _spi_work_mode
+{
+    SPI_WORK_MODE_0,
+    SPI_WORK_MODE_1,
+    SPI_WORK_MODE_2,
+    SPI_WORK_MODE_3,
+} spi_work_mode_t;
+
+typedef enum _spi_frame_format
+{
+    SPI_FF_STANDARD,
+    SPI_FF_DUAL,
+    SPI_FF_QUAD,
+    SPI_FF_OCTAL
+} spi_frame_format_t;
+
+typedef enum _spi_instruction_address_trans_mode
+{
+    SPI_AITM_STANDARD,
+    SPI_AITM_ADDR_STANDARD,
+    SPI_AITM_AS_FRAME_FORMAT
+} spi_instruction_address_trans_mode_t;
+
+typedef enum _spi_transfer_mode
+{
+    SPI_TMOD_TRANS_RECV,
+    SPI_TMOD_TRANS,
+    SPI_TMOD_RECV,
+    SPI_TMOD_EEROM
+} spi_transfer_mode_t;
+
+typedef enum _spi_transfer_width
+{
+    SPI_TRANS_CHAR = 0x1,
+    SPI_TRANS_SHORT = 0x2,
+    SPI_TRANS_INT = 0x4,
+} spi_transfer_width_t;
+
+typedef enum _spi_chip_select
+{
+    SPI_CHIP_SELECT_0,
+    SPI_CHIP_SELECT_1,
+    SPI_CHIP_SELECT_2,
+    SPI_CHIP_SELECT_3,
+    SPI_CHIP_SELECT_MAX,
+} spi_chip_select_t;
+
+typedef enum
+{
+    WRITE_CONFIG,
+    READ_CONFIG,
+    WRITE_DATA_BYTE,
+    READ_DATA_BYTE,
+    WRITE_DATA_BLOCK,
+    READ_DATA_BLOCK,
+} spi_slave_command_e;
+
+typedef struct
+{
+    uint8_t cmd;
+    uint8_t err;
+    uint32_t addr;
+    uint32_t len;
+} spi_slave_command_t;
+
+typedef enum
+{
+    IDLE,
+    COMMAND,
+    TRANSFER,
+} spi_slave_status_e;
+
+typedef int (*spi_slave_receive_callback_t)(void *ctx);
+
+typedef struct _spi_slave_instance
+{
+    uint8_t int_pin;
+    uint8_t ready_pin;
+    dmac_channel_number_t dmac_channel;
+    uint8_t dfs;
+    uint8_t slv_oe;
+    uint8_t work_mode;
+    size_t data_bit_length;
+    volatile spi_slave_status_e status;
+    volatile spi_slave_command_t command;
+    volatile uint8_t *config_ptr;
+    uint32_t config_len;
+    spi_slave_receive_callback_t callback;
+    uint8_t is_dual;
+    uint8_t mosi_pin;
+    uint8_t miso_pin;
+} spi_slave_instance_t;
+
+typedef struct _spi_data_t
+{
+    dmac_channel_number_t tx_channel;
+    dmac_channel_number_t rx_channel;
+    uint32_t *tx_buf;
+    size_t tx_len;
+    uint32_t *rx_buf;
+    size_t rx_len;
+    spi_transfer_mode_t transfer_mode;
+    bool fill_mode;
+} spi_data_t;
+
+extern volatile spi_t *const spi[4];
+
+/**
+ * @brief       Set spi configuration
+ *
+ * @param[in]   spi_num             Spi bus number
+ * @param[in]   mode                Spi mode
+ * @param[in]   frame_format        Spi frame format
+ * @param[in]   data_bit_length     Spi data bit length
+ * @param[in]   endian              0:little-endian 1:big-endian
+ *
+ * @return      Void
+ */
+void spi_init(spi_device_num_t spi_num, spi_work_mode_t work_mode, spi_frame_format_t frame_format,
+              size_t data_bit_length, uint32_t endian);
+
+/**
+ * @brief       Set multiline configuration
+ *
+ * @param[in]   spi_num                                 Spi bus number
+ * @param[in]   instruction_length                      Instruction length
+ * @param[in]   address_length                          Address length
+ * @param[in]   wait_cycles                             Wait cycles
+ * @param[in]   instruction_address_trans_mode          Spi transfer mode
+ *
+ */
+void spi_init_non_standard(spi_device_num_t spi_num, uint32_t instruction_length, uint32_t address_length,
+                           uint32_t wait_cycles, spi_instruction_address_trans_mode_t instruction_address_trans_mode);
+
+/**
+ * @brief       Spi send data
+ *
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   cmd_buff        Spi command buffer point
+ * @param[in]   cmd_len         Spi command length
+ * @param[in]   tx_buff         Spi transmit buffer point
+ * @param[in]   tx_len          Spi transmit buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_send_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
+
+/**
+ * @brief       Spi receive data
+ *
+ * @param[in]   spi_num             Spi bus number
+ * @param[in]   chip_select         Spi chip select
+ * @param[in]   cmd_buff            Spi command buffer point
+ * @param[in]   cmd_len             Spi command length
+ * @param[in]   rx_buff             Spi receive buffer point
+ * @param[in]   rx_len              Spi receive buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
+
+/**
+ * @brief       Spi special receive data
+ *
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   cmd_buff        Spi command buffer point
+ * @param[in]   cmd_len         Spi command length
+ * @param[in]   rx_buff         Spi receive buffer point
+ * @param[in]   rx_len          Spi receive buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
+
+/**
+ * @brief       Spi special send data
+ *
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   cmd_buff        Spi command buffer point
+ * @param[in]   cmd_len         Spi command length
+ * @param[in]   tx_buff         Spi transmit buffer point
+ * @param[in]   tx_len          Spi transmit buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
+
+/**
+ * @brief       Spi send data by dma
+ *
+ * @param[in]   channel_num     Dmac channel number
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   cmd_buff        Spi command buffer point
+ * @param[in]   cmd_len         Spi command length
+ * @param[in]   tx_buff         Spi transmit buffer point
+ * @param[in]   tx_len          Spi transmit buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_send_data_standard_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                                spi_chip_select_t chip_select,
+                                const uint8_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
+
+/**
+ * @brief       Spi receive data by dma
+ *
+ * @param[in]   w_channel_num       Dmac write channel number
+ * @param[in]   r_channel_num       Dmac read channel number
+ * @param[in]   spi_num             Spi bus number
+ * @param[in]   chip_select         Spi chip select
+ * @param[in]   cmd_buff            Spi command buffer point
+ * @param[in]   cmd_len             Spi command length
+ * @param[in]   rx_buff             Spi receive buffer point
+ * @param[in]   rx_len              Spi receive buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
+
+/**
+ * @brief       Spi special send data by dma
+ *
+ * @param[in]   channel_num     Dmac channel number
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   cmd_buff        Spi command buffer point
+ * @param[in]   cmd_len         Spi command length
+ * @param[in]   tx_buff         Spi transmit buffer point
+ * @param[in]   tx_len          Spi transmit buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                                spi_chip_select_t chip_select,
+                                const uint32_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len);
+
+/**
+ * @brief       Spi special receive data by dma
+ *
+ * @param[in]   dma_send_channel_num        Dmac write channel number
+ * @param[in]   dma_receive_channel_num     Dmac read channel number
+ * @param[in]   spi_num                     Spi bus number
+ * @param[in]   chip_select                 Spi chip select
+ * @param[in]   cmd_buff                    Spi command buffer point
+ * @param[in]   cmd_len                     Spi command length
+ * @param[in]   rx_buff                     Spi receive buffer point
+ * @param[in]   rx_len                      Spi receive buffer length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len);
+
+/**
+ * @brief       Spi fill dma
+ *
+ * @param[in]   channel_num     Dmac channel number
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   tx_buff        Spi command buffer point
+ * @param[in]   tx_len         Spi command length
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,
+                       const uint32_t *tx_buff, size_t tx_len);
+
+/**
+ * @brief       Spi normal send by dma
+ *
+ * @param[in]   channel_num     Dmac channel number
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   tx_buff         Spi transmit buffer point
+ * @param[in]   tx_len          Spi transmit buffer length
+ * @param[in]   stw             Spi transfer width
+ *
+ * @return      Result
+ *     - 0      Success
+ *     - Other  Fail
+ */
+void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                              spi_chip_select_t chip_select,
+                              const void *tx_buff, size_t tx_len, spi_transfer_width_t spi_transfer_width);
+
+/**
+ * @brief       Spi normal send by dma
+ *
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   spi_clk         Spi clock rate
+ *
+ * @return      The real spi clock rate
+ */
+uint32_t spi_set_clk_rate(spi_device_num_t spi_num, uint32_t spi_clk);
+
+/**
+ * @brief       Spi full duplex send receive data by dma
+ *
+ * @param[in]   dma_send_channel_num          Dmac write channel number
+ * @param[in]   dma_receive_channel_num       Dmac read channel number
+ * @param[in]   spi_num                       Spi bus number
+ * @param[in]   chip_select                   Spi chip select
+ * @param[in]   tx_buf                        Spi send buffer
+ * @param[in]   tx_len                        Spi send buffer length
+ * @param[in]   rx_buf                        Spi receive buffer
+ * @param[in]   rx_len                        Spi receive buffer length
+ *
+ */
+void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select,
+                                   const uint8_t *tx_buf, size_t tx_len, uint8_t *rx_buf, size_t rx_len);
+
+/**
+ * @brief       Set spi slave configuration
+ *
+ * @param[in]   int_pin             SPI master starts sending data interrupt.
+ * @param[in]   ready_pin           SPI slave ready.
+ * @param[in]   dmac_channel        Dmac channel number for block.
+ * @param[in]   data_bit_length     Spi data bit length
+ * @param[in]   data                SPI slave device data buffer.
+ * @param[in]   len                 The length of SPI slave device data buffer.
+ * @param[in]   callback            Callback of spi slave.
+ *
+ * @return      Void
+ */
+void spi_slave_config(uint8_t int_pin, uint8_t ready_pin, dmac_channel_number_t dmac_channel, size_t data_bit_length, uint8_t *data, uint32_t len, spi_slave_receive_callback_t callback);
+
+void spi_slave_dual_config(uint8_t int_pin,
+                           uint8_t ready_pin,
+                           uint8_t mosi_pin,
+                           uint8_t miso_pin,
+                           dmac_channel_number_t dmac_channel,
+                           size_t data_bit_length,
+                           uint8_t *data,
+                           uint32_t len,
+                           spi_slave_receive_callback_t callback);
+
+/**
+ * @brief       Spi handle transfer data operations
+ *
+ * @param[in]   spi_num         Spi bus number
+ * @param[in]   chip_select     Spi chip select
+ * @param[in]   data            Spi transfer data information
+ * @param[in]   cb              Spi DMA callback
+ *
+ */
+void spi_handle_data_dma(spi_device_num_t spi_num, spi_chip_select_t chip_select, spi_data_t data, plic_interrupt_t *cb);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DRIVER_SPI_H */

+ 181 - 181
lib/drivers/iomem.c

@@ -1,181 +1,181 @@
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "iomem.h"
-#include "printf.h"
-#include "atomic.h"
-
-#define IOMEM_BLOCK_SIZE 256
-
-typedef struct _iomem_malloc_t
-{
-    void (*init)();
-    uint32_t (*unused)();
-    uint8_t *membase;
-    uint32_t memsize;
-    uint32_t memtblsize;
-    uint16_t *memmap;
-    uint8_t  memrdy;
-    _lock_t *lock;
-} iomem_malloc_t;
-
-static _lock_t iomem_lock;
-
-static void iomem_init();
-static uint32_t k_unused();
-extern char *_ioheap_line;
-extern char *_heap_line;
-extern char _heap_start[];
-extern char *_heap_cur;
-
-iomem_malloc_t malloc_cortol = 
-{
-    iomem_init,
-    k_unused,
-    NULL,
-    0,
-    0,
-    NULL,
-    0,
-    &iomem_lock
-};
-
-static void iomem_set(void *s, uint8_t c, uint32_t num)
-{
-    uint8_t *xs = s;
-    while(num--)
-        *xs++=c;
-}
-
-static void iomem_init()
-{
-    malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
-    malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
-
-    malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
-    malloc_cortol.memmap = (uint16_t *)malloc(malloc_cortol.memtblsize * 2);
-    mb();
-
-    malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
-    malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
-    malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
-
-    iomem_set(malloc_cortol.memmap, 0, malloc_cortol.memtblsize * 2);
-    iomem_set(malloc_cortol.membase, 0, malloc_cortol.memsize);
-    malloc_cortol.memrdy = 1;
-}
-
-static uint32_t k_unused()
-{
-    uint32_t unused=0;
-    unused = (uintptr_t)_ioheap_line + 0x40000000 - (uintptr_t)_heap_line;
-
-    return unused;
-}
-
-static uint32_t k_malloc(uint32_t size)
-{
-    signed long offset = 0;
-    uint32_t xmemb;
-    uint32_t kmemb = 0;
-
-    if(!malloc_cortol.memrdy)
-        malloc_cortol.init();
-    if(size==0)
-        return 0XFFFFFFFF;
-    xmemb=size / IOMEM_BLOCK_SIZE;
-    if(size % IOMEM_BLOCK_SIZE)
-        xmemb++;
-    for(offset=malloc_cortol.memtblsize-1; offset>=0; offset--)
-    {
-        if(!malloc_cortol.memmap[offset])
-        {
-            kmemb++;
-        }
-        else 
-        {
-            offset = offset - malloc_cortol.memmap[offset] + 1;
-            kmemb=0;
-        }
-        if(kmemb==xmemb)
-        {
-            malloc_cortol.memmap[offset] = xmemb;
-            malloc_cortol.memmap[offset+xmemb-1] = xmemb;
-            return (offset * IOMEM_BLOCK_SIZE);
-        }
-    }
-    return 0XFFFFFFFF;
-}
-
-static uint8_t k_free(uint32_t offset)
-{
-    if(!malloc_cortol.memrdy)
-    {
-        malloc_cortol.init();
-        return 1;
-    }  
-    if(offset < malloc_cortol.memsize)
-    {  
-        int index=offset / IOMEM_BLOCK_SIZE;
-        int nmemb=malloc_cortol.memmap[index];
-
-        malloc_cortol.memmap[index] = 0;
-        malloc_cortol.memmap[index+nmemb-1] = 0;
-
-        if((uintptr_t)_ioheap_line == (uintptr_t)malloc_cortol.membase + offset)
-        {
-            _ioheap_line = (char *)((uintptr_t)_ioheap_line + nmemb * IOMEM_BLOCK_SIZE);
-        }
-        return 0;
-    }
-    else 
-        return 2;
-}  
-
-void iomem_free(void *paddr)
-{
-    uint32_t offset;
-    if(paddr == NULL)
-        return;
-    _lock_acquire_recursive(malloc_cortol.lock);
-    offset=(uintptr_t)paddr - (uintptr_t)malloc_cortol.membase;
-    k_free(offset);
-    _lock_release_recursive(malloc_cortol.lock);
-}
-
-void *iomem_malloc(uint32_t size)
-{
-    _lock_acquire_recursive(malloc_cortol.lock);
-    uint32_t offset;
-    offset=k_malloc(size);
-    if(offset == 0XFFFFFFFF)
-    {
-        printk("IOMEM malloc OUT of MEMORY!\r\n");
-        _lock_release_recursive(malloc_cortol.lock);
-         return NULL;
-    }
-    else 
-    {
-        if((uintptr_t)_ioheap_line > (uintptr_t)malloc_cortol.membase + offset)
-        {
-            _ioheap_line = (char *)((uintptr_t)malloc_cortol.membase + offset);
-            if((uintptr_t)_ioheap_line < (uintptr_t)_heap_line-0x40000000)
-            {
-                printk("Error: OUT of MEMORY!\r\n");
-                printk("_heap_line = %p\r\n", _heap_line);
-                printk("_ioheap_line = %p\r\n", _ioheap_line);
-                while(1)
-                    ;
-            }
-        };
-        _lock_release_recursive(malloc_cortol.lock);
-        return (void*)((uintptr_t)malloc_cortol.membase + offset);
-    }
-}
-
-uint32_t iomem_unused()
-{
-    return malloc_cortol.unused();
-}
-
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "iomem.h"
+#include "printf.h"
+#include "atomic.h"
+
+#define IOMEM_BLOCK_SIZE 256
+
+typedef struct _iomem_malloc_t
+{
+    void (*init)();
+    uint32_t (*unused)();
+    uint8_t *membase;
+    uint32_t memsize;
+    uint32_t memtblsize;
+    uint16_t *memmap;
+    uint8_t  memrdy;
+    _lock_t *lock;
+} iomem_malloc_t;
+
+static _lock_t iomem_lock;
+
+static void iomem_init();
+static uint32_t k_unused();
+extern char *_ioheap_line;
+extern char *_heap_line;
+extern char _heap_start[];
+extern char *_heap_cur;
+
+iomem_malloc_t malloc_cortol = 
+{
+    iomem_init,
+    k_unused,
+    NULL,
+    0,
+    0,
+    NULL,
+    0,
+    &iomem_lock
+};
+
+static void iomem_set(void *s, uint8_t c, uint32_t num)
+{
+    uint8_t *xs = s;
+    while(num--)
+        *xs++=c;
+}
+
+static void iomem_init()
+{
+    malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
+    malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
+
+    malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
+    malloc_cortol.memmap = (uint16_t *)malloc(malloc_cortol.memtblsize * 2);
+    mb();
+
+    malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
+    malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
+    malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
+
+    iomem_set(malloc_cortol.memmap, 0, malloc_cortol.memtblsize * 2);
+    iomem_set(malloc_cortol.membase, 0, malloc_cortol.memsize);
+    malloc_cortol.memrdy = 1;
+}
+
+static uint32_t k_unused()
+{
+    uint32_t unused=0;
+    unused = (uintptr_t)_ioheap_line + 0x40000000 - (uintptr_t)_heap_line;
+
+    return unused;
+}
+
+static uint32_t k_malloc(uint32_t size)
+{
+    signed long offset = 0;
+    uint32_t xmemb;
+    uint32_t kmemb = 0;
+
+    if(!malloc_cortol.memrdy)
+        malloc_cortol.init();
+    if(size==0)
+        return 0XFFFFFFFF;
+    xmemb=size / IOMEM_BLOCK_SIZE;
+    if(size % IOMEM_BLOCK_SIZE)
+        xmemb++;
+    for(offset=malloc_cortol.memtblsize-1; offset>=0; offset--)
+    {
+        if(!malloc_cortol.memmap[offset])
+        {
+            kmemb++;
+        }
+        else 
+        {
+            offset = offset - malloc_cortol.memmap[offset] + 1;
+            kmemb=0;
+        }
+        if(kmemb==xmemb)
+        {
+            malloc_cortol.memmap[offset] = xmemb;
+            malloc_cortol.memmap[offset+xmemb-1] = xmemb;
+            return (offset * IOMEM_BLOCK_SIZE);
+        }
+    }
+    return 0XFFFFFFFF;
+}
+
+static uint8_t k_free(uint32_t offset)
+{
+    if(!malloc_cortol.memrdy)
+    {
+        malloc_cortol.init();
+        return 1;
+    }  
+    if(offset < malloc_cortol.memsize)
+    {  
+        int index=offset / IOMEM_BLOCK_SIZE;
+        int nmemb=malloc_cortol.memmap[index];
+
+        malloc_cortol.memmap[index] = 0;
+        malloc_cortol.memmap[index+nmemb-1] = 0;
+
+        if((uintptr_t)_ioheap_line == (uintptr_t)malloc_cortol.membase + offset)
+        {
+            _ioheap_line = (char *)((uintptr_t)_ioheap_line + nmemb * IOMEM_BLOCK_SIZE);
+        }
+        return 0;
+    }
+    else 
+        return 2;
+}  
+
+void iomem_free(void *paddr)
+{
+    uint32_t offset;
+    if(paddr == NULL)
+        return;
+    _lock_acquire_recursive(malloc_cortol.lock);
+    offset=(uintptr_t)paddr - (uintptr_t)malloc_cortol.membase;
+    k_free(offset);
+    _lock_release_recursive(malloc_cortol.lock);
+}
+
+void *iomem_malloc(uint32_t size)
+{
+    _lock_acquire_recursive(malloc_cortol.lock);
+    uint32_t offset;
+    offset=k_malloc(size);
+    if(offset == 0XFFFFFFFF)
+    {
+        printk("IOMEM malloc OUT of MEMORY!\r\n");
+        _lock_release_recursive(malloc_cortol.lock);
+         return NULL;
+    }
+    else 
+    {
+        if((uintptr_t)_ioheap_line > (uintptr_t)malloc_cortol.membase + offset)
+        {
+            _ioheap_line = (char *)((uintptr_t)malloc_cortol.membase + offset);
+            if((uintptr_t)_ioheap_line < (uintptr_t)_heap_line-0x40000000)
+            {
+                printk("Error: OUT of MEMORY!\r\n");
+                printk("_heap_line = %p\r\n", _heap_line);
+                printk("_ioheap_line = %p\r\n", _ioheap_line);
+                while(1)
+                    ;
+            }
+        };
+        _lock_release_recursive(malloc_cortol.lock);
+        return (void*)((uintptr_t)malloc_cortol.membase + offset);
+    }
+}
+
+uint32_t iomem_unused()
+{
+    return malloc_cortol.unused();
+}
+

+ 1574 - 1574
lib/drivers/spi.c

@@ -1,1574 +1,1574 @@
-/* Copyright 2018 Canaan Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <bsp.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include "fpioa.h"
-#include "gpiohs.h"
-#include "platform.h"
-#include "spi.h"
-#include "sysctl.h"
-#include "utils.h"
-#include "iomem.h"
-
-volatile spi_t *const spi[4] =
-    {
-        (volatile spi_t *)SPI0_BASE_ADDR,
-        (volatile spi_t *)SPI1_BASE_ADDR,
-        (volatile spi_t *)SPI_SLAVE_BASE_ADDR,
-        (volatile spi_t *)SPI3_BASE_ADDR};
-
-typedef struct _spi_dma_context
-{
-    uint8_t *buffer;
-    size_t buf_len;
-    uint32_t *malloc_buffer;
-    spi_transfer_mode_t int_mode;
-    dmac_channel_number_t dmac_channel;
-    spi_device_num_t spi_num;
-    plic_instance_t spi_int_instance;
-} spi_dma_context_t;
-
-spi_dma_context_t spi_dma_context[4];
-
-typedef struct _spi_instance_t
-{
-    spi_device_num_t spi_num;
-    spi_transfer_mode_t transfer_mode;
-    dmac_channel_number_t dmac_channel;
-    plic_instance_t spi_int_instance;
-    spinlock_t lock;
-} spi_instance_t;
-
-static spi_instance_t g_spi_instance[4];
-
-static spi_slave_instance_t g_instance;
-
-static spi_frame_format_t spi_get_frame_format(spi_device_num_t spi_num)
-{
-    uint8_t frf_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            frf_offset = 21;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            frf_offset = 22;
-            break;
-    }
-    volatile spi_t *spi_adapter = spi[spi_num];
-    return ((spi_adapter->ctrlr0 >> frf_offset) & 0x3);
-}
-
-static spi_transfer_width_t spi_get_frame_size(size_t data_bit_length)
-{
-    if(data_bit_length < 8)
-        return SPI_TRANS_CHAR;
-    else if(data_bit_length < 16)
-        return SPI_TRANS_SHORT;
-    return SPI_TRANS_INT;
-}
-
-static int spi_dma_irq(void *ctx)
-{
-    spi_instance_t *v_instance = (spi_instance_t *)ctx;
-    volatile spi_t *spi_handle = spi[v_instance->spi_num];
-    dmac_irq_unregister(v_instance->dmac_channel);
-    while((spi_handle->sr & 0x05) != 0x04)
-        ;
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-    spinlock_unlock(&v_instance->lock);
-    if(v_instance->spi_int_instance.callback)
-    {
-        v_instance->spi_int_instance.callback(v_instance->spi_int_instance.ctx);
-    }
-    return 0;
-}
-
-static int spi_clk_init(uint8_t spi_num)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    if(spi_num == 3)
-        sysctl_clock_set_clock_select(SYSCTL_CLOCK_SELECT_SPI3, 1);
-    sysctl_clock_enable(SYSCTL_CLOCK_SPI0 + spi_num);
-    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_SPI0 + spi_num, 0);
-    return 0;
-}
-
-static void spi_set_tmod(uint8_t spi_num, uint32_t tmod)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX);
-    volatile spi_t *spi_handle = spi[spi_num];
-    uint8_t tmod_offset = 0;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-        case 2:
-            tmod_offset = 8;
-            break;
-        case 3:
-        default:
-            tmod_offset = 10;
-            break;
-    }
-    set_bit(&spi_handle->ctrlr0, 3 << tmod_offset, tmod << tmod_offset);
-}
-
-void spi_init(spi_device_num_t spi_num, spi_work_mode_t work_mode, spi_frame_format_t frame_format,
-              size_t data_bit_length, uint32_t endian)
-{
-    configASSERT(data_bit_length >= 4 && data_bit_length <= 32);
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    spi_clk_init(spi_num);
-
-    uint8_t dfs_offset, frf_offset, work_mode_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            frf_offset = 21;
-            work_mode_offset = 6;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            frf_offset = 22;
-            work_mode_offset = 8;
-            break;
-    }
-
-    switch(frame_format)
-    {
-        case SPI_FF_DUAL:
-            configASSERT(data_bit_length % 2 == 0);
-            break;
-        case SPI_FF_QUAD:
-            configASSERT(data_bit_length % 4 == 0);
-            break;
-        case SPI_FF_OCTAL:
-            configASSERT(data_bit_length % 8 == 0);
-            break;
-        default:
-            break;
-    }
-    volatile spi_t *spi_adapter = spi[spi_num];
-    if(spi_adapter->baudr == 0)
-        spi_adapter->baudr = 0x14;
-    spi_adapter->imr = 0x00;
-    spi_adapter->dmacr = 0x00;
-    spi_adapter->dmatdlr = 0x10;
-    spi_adapter->dmardlr = 0x00;
-    spi_adapter->ser = 0x00;
-    spi_adapter->ssienr = 0x00;
-    spi_adapter->ctrlr0 = (work_mode << work_mode_offset) | (frame_format << frf_offset) | ((data_bit_length - 1) << dfs_offset);
-    spi_adapter->spi_ctrlr0 = 0;
-    spi_adapter->endian = endian;
-}
-
-void spi_init_non_standard(spi_device_num_t spi_num, uint32_t instruction_length, uint32_t address_length,
-                           uint32_t wait_cycles, spi_instruction_address_trans_mode_t instruction_address_trans_mode)
-{
-    configASSERT(wait_cycles < (1 << 5));
-    configASSERT(instruction_address_trans_mode < 3);
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    volatile spi_t *spi_handle = spi[spi_num];
-    uint32_t inst_l = 0;
-    switch(instruction_length)
-    {
-        case 0:
-            inst_l = 0;
-            break;
-        case 4:
-            inst_l = 1;
-            break;
-        case 8:
-            inst_l = 2;
-            break;
-        case 16:
-            inst_l = 3;
-            break;
-        default:
-            configASSERT(!"Invalid instruction length");
-            break;
-    }
-
-    configASSERT(address_length % 4 == 0 && address_length <= 60);
-    uint32_t addr_l = address_length / 4;
-
-    spi_handle->spi_ctrlr0 = (wait_cycles << 11) | (inst_l << 8) | (addr_l << 2) | instruction_address_trans_mode;
-}
-
-uint32_t spi_set_clk_rate(spi_device_num_t spi_num, uint32_t spi_clk)
-{
-    uint32_t spi_baudr = sysctl_clock_get_freq(SYSCTL_CLOCK_SPI0 + spi_num) / spi_clk;
-    if(spi_baudr < 2)
-    {
-        spi_baudr = 2;
-    } else if(spi_baudr > 65534)
-    {
-        spi_baudr = 65534;
-    }
-    volatile spi_t *spi_adapter = spi[spi_num];
-    spi_adapter->baudr = spi_baudr;
-    return sysctl_clock_get_freq(SYSCTL_CLOCK_SPI0 + spi_num) / spi_baudr;
-}
-
-void spi_send_data_normal(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    size_t index, fifo_len;
-    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
-
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    uint8_t v_misalign_flag = 0;
-    uint32_t v_send_data;
-    if((uintptr_t)tx_buff % frame_width)
-        v_misalign_flag = 1;
-
-    spi_handle->ssienr = 0x01;
-    spi_handle->ser = 1U << chip_select;
-    uint32_t i = 0;
-    while(tx_len)
-    {
-        fifo_len = 32 - spi_handle->txflr;
-        fifo_len = fifo_len < tx_len ? fifo_len : tx_len;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                fifo_len = fifo_len / 4 * 4;
-                if(v_misalign_flag)
-                {
-                    for(index = 0; index < fifo_len; index += 4)
-                    {
-                        memcpy(&v_send_data, tx_buff + i, 4);
-                        spi_handle->dr[0] = v_send_data;
-                        i += 4;
-                    }
-                } else
-                {
-                    for(index = 0; index < fifo_len / 4; index++)
-                        spi_handle->dr[0] = ((uint32_t *)tx_buff)[i++];
-                }
-                break;
-            case SPI_TRANS_SHORT:
-                fifo_len = fifo_len / 2 * 2;
-                if(v_misalign_flag)
-                {
-                    for(index = 0; index < fifo_len; index += 2)
-                    {
-                        memcpy(&v_send_data, tx_buff + i, 2);
-                        spi_handle->dr[0] = v_send_data;
-                        i += 2;
-                    }
-                } else
-                {
-                    for(index = 0; index < fifo_len / 2; index++)
-                        spi_handle->dr[0] = ((uint16_t *)tx_buff)[i++];
-                }
-                break;
-            default:
-                for(index = 0; index < fifo_len; index++)
-                    spi_handle->dr[0] = tx_buff[i++];
-                break;
-        }
-        tx_len -= fifo_len;
-    }
-    while((spi_handle->sr & 0x05) != 0x04)
-        ;
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-void spi_send_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    uint8_t *v_buf = malloc(cmd_len + tx_len);
-    size_t i;
-    for(i = 0; i < cmd_len; i++)
-        v_buf[i] = cmd_buff[i];
-    for(i = 0; i < tx_len; i++)
-        v_buf[cmd_len + i] = tx_buff[i];
-
-    spi_send_data_normal(spi_num, chip_select, v_buf, cmd_len + tx_len);
-    free((void *)v_buf);
-}
-
-void spi_send_data_standard_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                                spi_chip_select_t chip_select,
-                                const uint8_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    uint32_t *buf;
-    size_t v_send_len;
-    int i;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            buf = malloc(cmd_len + tx_len);
-            for(i = 0; i < cmd_len / 4; i++)
-                buf[i] = ((uint32_t *)cmd_buff)[i];
-            for(i = 0; i < tx_len / 4; i++)
-                buf[cmd_len / 4 + i] = ((uint32_t *)tx_buff)[i];
-            v_send_len = (cmd_len + tx_len) / 4;
-            break;
-        case SPI_TRANS_SHORT:
-            buf = malloc((cmd_len + tx_len) / 2 * sizeof(uint32_t));
-            for(i = 0; i < cmd_len / 2; i++)
-                buf[i] = ((uint16_t *)cmd_buff)[i];
-            for(i = 0; i < tx_len / 2; i++)
-                buf[cmd_len / 2 + i] = ((uint16_t *)tx_buff)[i];
-            v_send_len = (cmd_len + tx_len) / 2;
-            break;
-        default:
-            buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
-            for(i = 0; i < cmd_len; i++)
-                buf[i] = cmd_buff[i];
-            for(i = 0; i < tx_len; i++)
-                buf[cmd_len + i] = tx_buff[i];
-            v_send_len = cmd_len + tx_len;
-            break;
-    }
-
-    spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
-
-    free((void *)buf);
-}
-
-void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                              spi_chip_select_t chip_select,
-                              const void *tx_buff, size_t tx_len, spi_transfer_width_t spi_transfer_width)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
-    volatile spi_t *spi_handle = spi[spi_num];
-    uint32_t *buf;
-    int i;
-    switch(spi_transfer_width)
-    {
-        case SPI_TRANS_SHORT:
-#if FIX_CACHE
-            buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
-#else
-            buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
-#endif
-            for(i = 0; i < tx_len; i++)
-                buf[i] = ((uint16_t *)tx_buff)[i];
-            break;
-        case SPI_TRANS_INT:
-            buf = (uint32_t *)tx_buff;
-            break;
-        case SPI_TRANS_CHAR:
-        default:
-#if FIX_CACHE
-            buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
-#else
-            buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
-#endif
-
-            for(i = 0; i < tx_len; i++)
-                buf[i] = ((uint8_t *)tx_buff)[i];
-            break;
-    }
-    spi_handle->dmacr = 0x2; /*enable dma transmit*/
-    spi_handle->ssienr = 0x01;
-
-    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-
-    dmac_set_single_mode(channel_num, buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, tx_len);
-    spi_handle->ser = 1U << chip_select;
-    dmac_wait_done(channel_num);
-    if(spi_transfer_width != SPI_TRANS_INT)
-    {
-#if FIX_CACHE
-        iomem_free((void *)buf);
-#else
-        free((void *)buf);
-#endif
-    }
-    while((spi_handle->sr & 0x05) != 0x04)
-        ;
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select,
-                                   const uint8_t *tx_buf, size_t tx_len, uint8_t *rx_buf, size_t rx_len)
-{
-    spi_set_tmod(spi_num, SPI_TMOD_TRANS_RECV);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-    size_t v_tx_len = tx_len / frame_width;
-    size_t v_rx_len = rx_len / frame_width;
-
-    size_t v_max_len = v_tx_len > v_rx_len ? v_tx_len : v_rx_len;
-
-#if FIX_CACHE
-    uint32_t *v_tx_buf = iomem_malloc(v_max_len * 4);
-    uint32_t *v_rx_buf = iomem_malloc(v_max_len * 4);
-#else
-    uint32_t *v_tx_buf = malloc(v_max_len * 4);
-    uint32_t *v_rx_buf = malloc(v_max_len * 4);
-#endif
-    uint32_t i = 0;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            for(i = 0; i < v_tx_len; i++)
-            {
-                v_tx_buf[i] = ((uint32_t *)tx_buf)[i];
-            }
-            if(v_max_len > v_tx_len)
-            {
-                while(i < v_max_len)
-                {
-                    v_tx_buf[i++] = 0xFFFFFFFF;
-                }
-            }
-            break;
-        case SPI_TRANS_SHORT:
-            for(i = 0; i < v_tx_len; i++)
-            {
-                v_tx_buf[i] = ((uint16_t *)tx_buf)[i];
-            }
-            if(v_max_len > v_tx_len)
-            {
-                while(i < v_max_len)
-                {
-                    v_tx_buf[i++] = 0xFFFFFFFF;
-                }
-            }
-            break;
-        default:
-            for(i = 0; i < v_tx_len; i++)
-            {
-                v_tx_buf[i] = tx_buf[i];
-            }
-            if(v_max_len > v_tx_len)
-            {
-                while(i < v_max_len)
-                {
-                    v_tx_buf[i++] = 0xFFFFFFFF;
-                }
-            }
-            break;
-    }
-
-    spi_handle->dmacr = 0x3;
-    spi_handle->ssienr = 0x01;
-
-    sysctl_dma_select((sysctl_dma_channel_t)dma_send_channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-    sysctl_dma_select((sysctl_dma_channel_t)dma_receive_channel_num, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-
-    dmac_set_single_mode(dma_receive_channel_num, (void *)(&spi_handle->dr[0]), v_rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, v_max_len);
-
-    dmac_set_single_mode(dma_send_channel_num, v_tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, v_max_len);
-
-    spi_handle->ser = 1U << chip_select;
-    dmac_wait_done(dma_send_channel_num);
-    dmac_wait_done(dma_receive_channel_num);
-
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            for(i = 0; i < v_rx_len; i++)
-                ((uint32_t *)rx_buf)[i] = v_rx_buf[i];
-            break;
-        case SPI_TRANS_SHORT:
-            for(i = 0; i < v_rx_len; i++)
-                ((uint16_t *)rx_buf)[i] = v_rx_buf[i];
-            break;
-        default:
-            for(i = 0; i < v_rx_len; i++)
-                rx_buf[i] = v_rx_buf[i];
-            break;
-    }
-#if FIX_CACHE
-    iomem_free(v_tx_buf);
-    iomem_free(v_rx_buf);
-#else
-    free(v_tx_buf);
-    free(v_rx_buf);
-#endif
-}
-
-void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    size_t index, fifo_len;
-    if(cmd_len == 0)
-        spi_set_tmod(spi_num, SPI_TMOD_RECV);
-    else
-        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    uint32_t i = 0;
-    size_t v_cmd_len = cmd_len / frame_width;
-    uint32_t v_rx_len = rx_len / frame_width;
-
-    spi_handle->ctrlr1 = (uint32_t)(v_rx_len - 1);
-    spi_handle->ssienr = 0x01;
-
-    while(v_cmd_len)
-    {
-        fifo_len = 32 - spi_handle->txflr;
-        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                for(index = 0; index < fifo_len; index++)
-                    spi_handle->dr[0] = ((uint32_t *)cmd_buff)[i++];
-                break;
-            case SPI_TRANS_SHORT:
-                for(index = 0; index < fifo_len; index++)
-                    spi_handle->dr[0] = ((uint16_t *)cmd_buff)[i++];
-                break;
-            default:
-                for(index = 0; index < fifo_len; index++)
-                    spi_handle->dr[0] = cmd_buff[i++];
-                break;
-        }
-        spi_handle->ser = 1U << chip_select;
-        v_cmd_len -= fifo_len;
-    }
-
-    if(cmd_len == 0)
-    {
-        spi_handle->dr[0] = 0xffffffff;
-        spi_handle->ser = 1U << chip_select;
-    }
-
-    i = 0;
-    while(v_rx_len)
-    {
-        fifo_len = spi_handle->rxflr;
-        fifo_len = fifo_len < v_rx_len ? fifo_len : v_rx_len;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                for(index = 0; index < fifo_len; index++)
-                    ((uint32_t *)rx_buff)[i++] = spi_handle->dr[0];
-                break;
-            case SPI_TRANS_SHORT:
-                for(index = 0; index < fifo_len; index++)
-                    ((uint16_t *)rx_buff)[i++] = (uint16_t)spi_handle->dr[0];
-                break;
-            default:
-                for(index = 0; index < fifo_len; index++)
-                    rx_buff[i++] = (uint8_t)spi_handle->dr[0];
-                break;
-        }
-
-        v_rx_len -= fifo_len;
-    }
-
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-void spi_receive_data_normal_dma(dmac_channel_number_t dma_send_channel_num,
-                                 dmac_channel_number_t dma_receive_channel_num,
-                                 spi_device_num_t spi_num, spi_chip_select_t chip_select, const void *cmd_buff,
-                                 size_t cmd_len, void *rx_buff, size_t rx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    if(cmd_len == 0)
-        spi_set_tmod(spi_num, SPI_TMOD_RECV);
-    else
-        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
-
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    spi_handle->ctrlr1 = (uint32_t)(rx_len - 1);
-    spi_handle->dmacr = 0x3;
-    spi_handle->ssienr = 0x01;
-    if(cmd_len)
-        sysctl_dma_select((sysctl_dma_channel_t)dma_send_channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-    sysctl_dma_select((sysctl_dma_channel_t)dma_receive_channel_num, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-
-    dmac_set_single_mode(dma_receive_channel_num, (void *)(&spi_handle->dr[0]), rx_buff, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, rx_len);
-    if(cmd_len)
-        dmac_set_single_mode(dma_send_channel_num, cmd_buff, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, cmd_len);
-    if(cmd_len == 0 && spi_get_frame_format(spi_num) == SPI_FF_STANDARD)
-        spi[spi_num]->dr[0] = 0xffffffff;
-    spi_handle->ser = 1U << chip_select;
-    if(cmd_len)
-        dmac_wait_done(dma_send_channel_num);
-    dmac_wait_done(dma_receive_channel_num);
-
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
-                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    size_t i;
-
-    uint32_t *write_cmd;
-    uint32_t *read_buf;
-    size_t v_recv_len;
-    size_t v_cmd_len;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-#if FIX_CACHE
-            write_cmd = iomem_malloc(cmd_len + rx_len);
-#else
-            write_cmd = malloc(cmd_len + rx_len);
-#endif
-            for(i = 0; i < cmd_len / 4; i++)
-                write_cmd[i] = ((uint32_t *)cmd_buff)[i];
-            read_buf = &write_cmd[i];
-            v_recv_len = rx_len / 4;
-            v_cmd_len = cmd_len / 4;
-            break;
-        case SPI_TRANS_SHORT:
-#if FIX_CACHE
-            write_cmd = iomem_malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
-#else
-            write_cmd = malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len / 2; i++)
-                write_cmd[i] = ((uint16_t *)cmd_buff)[i];
-            read_buf = &write_cmd[i];
-            v_recv_len = rx_len / 2;
-            v_cmd_len = cmd_len / 2;
-            break;
-        default:
-#if FIX_CACHE
-            write_cmd = iomem_malloc((cmd_len + rx_len) * sizeof(uint32_t));
-#else
-            write_cmd = malloc((cmd_len + rx_len) * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len; i++)
-                write_cmd[i] = cmd_buff[i];
-            read_buf = &write_cmd[i];
-            v_recv_len = rx_len;
-            v_cmd_len = cmd_len;
-            break;
-    }
-
-    spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, write_cmd, v_cmd_len, read_buf, v_recv_len);
-
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            for(i = 0; i < v_recv_len; i++)
-                ((uint32_t *)rx_buff)[i] = read_buf[i];
-            break;
-        case SPI_TRANS_SHORT:
-            for(i = 0; i < v_recv_len; i++)
-                ((uint16_t *)rx_buff)[i] = read_buf[i];
-            break;
-        default:
-            for(i = 0; i < v_recv_len; i++)
-                rx_buff[i] = read_buf[i];
-            break;
-    }
-#if FIX_CACHE
-    iomem_free(write_cmd);
-#else
-    free(write_cmd);
-#endif
-}
-
-void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    size_t index, fifo_len;
-    if(cmd_len == 0)
-        spi_set_tmod(spi_num, SPI_TMOD_RECV);
-    else
-        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    uint32_t v_cmd_len = cmd_len;
-    uint32_t i = 0;
-
-    uint32_t v_rx_len = rx_len / frame_width;
-
-    spi_handle->ctrlr1 = (uint32_t)(v_rx_len - 1);
-    spi_handle->ssienr = 0x01;
-
-    while(v_cmd_len)
-    {
-        fifo_len = 32 - spi_handle->txflr;
-        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
-
-        for(index = 0; index < fifo_len; index++)
-            spi_handle->dr[0] = *cmd_buff++;
-
-        spi_handle->ser = 1U << chip_select;
-        v_cmd_len -= fifo_len;
-    }
-
-    if(cmd_len == 0)
-    {
-        spi_handle->ser = 1U << chip_select;
-    }
-
-    while(v_rx_len)
-    {
-        fifo_len = spi_handle->rxflr;
-        fifo_len = fifo_len < v_rx_len ? fifo_len : v_rx_len;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                for(index = 0; index < fifo_len; index++)
-                    ((uint32_t *)rx_buff)[i++] = spi_handle->dr[0];
-                break;
-            case SPI_TRANS_SHORT:
-                for(index = 0; index < fifo_len; index++)
-                    ((uint16_t *)rx_buff)[i++] = (uint16_t)spi_handle->dr[0];
-                break;
-            default:
-                for(index = 0; index < fifo_len; index++)
-                    rx_buff[i++] = (uint8_t)spi_handle->dr[0];
-                break;
-        }
-
-        v_rx_len -= fifo_len;
-    }
-
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
-                                   dmac_channel_number_t dma_receive_channel_num,
-                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    size_t i;
-
-    uint32_t *write_cmd = NULL;
-    uint32_t *read_buf;
-    size_t v_recv_len;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            v_recv_len = rx_len / 4;
-            break;
-        case SPI_TRANS_SHORT:
-#if FIX_CACHE
-            write_cmd = iomem_malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
-#else
-            write_cmd = malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len; i++)
-                write_cmd[i] = cmd_buff[i];
-            read_buf = &write_cmd[i];
-            v_recv_len = rx_len / 2;
-            break;
-        default:
-#if FIX_CACHE
-            write_cmd = iomem_malloc(cmd_len + rx_len * sizeof(uint32_t));
-#else
-            write_cmd = malloc(cmd_len + rx_len * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len; i++)
-                write_cmd[i] = cmd_buff[i];
-            read_buf = &write_cmd[i];
-            v_recv_len = rx_len;
-            break;
-    }
-    if(frame_width == SPI_TRANS_INT)
-        spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, cmd_buff, cmd_len, rx_buff, v_recv_len);
-    else
-        spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, write_cmd, cmd_len, read_buf, v_recv_len);
-
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            break;
-        case SPI_TRANS_SHORT:
-            for(i = 0; i < v_recv_len; i++)
-                ((uint16_t *)rx_buff)[i] = read_buf[i];
-            break;
-        default:
-            for(i = 0; i < v_recv_len; i++)
-                rx_buff[i] = read_buf[i];
-            break;
-    }
-
-    if(frame_width != SPI_TRANS_INT)
-    {
-#if FIX_CACHE
-        iomem_free(write_cmd);
-#else
-        free(write_cmd);
-#endif
-    }
-}
-
-void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
-                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    size_t index, fifo_len;
-    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
-    volatile spi_t *spi_handle = spi[spi_num];
-    spi_handle->ssienr = 0x01;
-    spi_handle->ser = 1U << chip_select;
-
-    size_t v_cmd_len = cmd_len * 4;
-    while(v_cmd_len)
-    {
-        fifo_len = 32 - spi_handle->txflr;
-        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
-        fifo_len = fifo_len / 4 * 4;
-        for(index = 0; index < fifo_len / 4; index++)
-            spi_handle->dr[0] = *cmd_buff++;
-        v_cmd_len -= fifo_len;
-    }
-    spi_send_data_normal(spi_num, chip_select, tx_buff, tx_len);
-}
-
-void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
-                                spi_chip_select_t chip_select,
-                                const uint32_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    uint8_t dfs_offset;
-    switch(spi_num)
-    {
-        case 0:
-        case 1:
-            dfs_offset = 16;
-            break;
-        case 2:
-            configASSERT(!"Spi Bus 2 Not Support!");
-            break;
-        case 3:
-        default:
-            dfs_offset = 0;
-            break;
-    }
-    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
-    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
-
-    uint32_t *buf;
-    size_t v_send_len;
-    int i;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-#if FIX_CACHE
-            buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len);
-#else
-            buf = malloc(cmd_len * sizeof(uint32_t) + tx_len);
-#endif
-            for(i = 0; i < cmd_len; i++)
-                buf[i] = cmd_buff[i];
-            for(i = 0; i < tx_len / 4; i++)
-                buf[cmd_len + i] = ((uint32_t *)tx_buff)[i];
-            v_send_len = cmd_len + tx_len / 4;
-            break;
-        case SPI_TRANS_SHORT:
-#if FIX_CACHE
-            buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
-#else
-            buf = malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len; i++)
-                buf[i] = cmd_buff[i];
-            for(i = 0; i < tx_len / 2; i++)
-                buf[cmd_len + i] = ((uint16_t *)tx_buff)[i];
-            v_send_len = cmd_len + tx_len / 2;
-            break;
-        default:
-#if FIX_CACHE
-            buf = iomem_malloc((cmd_len + tx_len) * sizeof(uint32_t));
-#else
-            buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
-#endif
-            for(i = 0; i < cmd_len; i++)
-                buf[i] = cmd_buff[i];
-            for(i = 0; i < tx_len; i++)
-                buf[cmd_len + i] = tx_buff[i];
-            v_send_len = cmd_len + tx_len;
-            break;
-    }
-
-    spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
-
-#if FIX_CACHE
-    iomem_free((void *)buf);
-#else
-    free((void *)buf);
-#endif
-}
-
-void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,
-                       const uint32_t *tx_buff, size_t tx_len)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-
-    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
-    volatile spi_t *spi_handle = spi[spi_num];
-    spi_handle->dmacr = 0x2; /*enable dma transmit*/
-    spi_handle->ssienr = 0x01;
-
-    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-    dmac_set_single_mode(channel_num, tx_buff, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
-                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, tx_len);
-    spi_handle->ser = 1U << chip_select;
-    dmac_wait_done(channel_num);
-
-    while((spi_handle->sr & 0x05) != 0x04)
-        ;
-    spi_handle->ser = 0x00;
-    spi_handle->ssienr = 0x00;
-}
-
-static void spi_slave_idle_mode(void)
-{
-    if(g_instance.is_dual)
-    {
-        fpioa_set_function(g_instance.mosi_pin, FUNC_SPI_SLAVE_D0);
-    }
-    volatile spi_t *spi_handle = spi[2];
-    g_instance.status = IDLE;
-    spi_handle->ssienr = 0x00;
-    spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
-    spi_handle->dmacr = 0x00;
-    spi_handle->ssienr = 0x01;
-    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_HIGH);
-}
-
-static void spi_slave_command_mode(void)
-{
-    volatile spi_t *spi_handle = spi[2];
-    uint8_t cmd_data[8], sum = 0;
-
-    spi_transfer_width_t frame_width = spi_get_frame_size(g_instance.data_bit_length - 1);
-    spi_device_num_t spi_num = SPI_DEVICE_2;
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            for(uint32_t i = 0; i < 8 / 4; i++)
-                ((uint32_t *)cmd_data)[i] = spi_handle->dr[0];
-            break;
-        case SPI_TRANS_SHORT:
-            for(uint32_t i = 0; i < 8 / 2; i++)
-                ((uint16_t *)cmd_data)[i] = spi_handle->dr[0];
-            break;
-        default:
-            for(uint32_t i = 0; i < 8; i++)
-                cmd_data[i] = spi_handle->dr[0];
-            break;
-    }
-
-    for(uint32_t i = 0; i < 7; i++)
-    {
-        sum += cmd_data[i];
-    }
-    if(cmd_data[7] != sum)
-    {
-        spi_slave_idle_mode();
-        return;
-    }
-    g_instance.command.cmd = cmd_data[0];
-    g_instance.command.addr = cmd_data[1] | (cmd_data[2] << 8) | (cmd_data[3] << 16) | (cmd_data[4] << 24);
-
-    g_instance.command.len = cmd_data[5] | (cmd_data[6] << 8);
-    if(g_instance.command.len == 0)
-        g_instance.command.len = 65536;
-    if((g_instance.command.cmd < WRITE_DATA_BLOCK) && (g_instance.command.len > 8))
-    {
-        spi_slave_idle_mode();
-        return;
-    }
-    g_instance.status = TRANSFER;
-    spi_handle->ssienr = 0x00;
-    if(g_instance.command.cmd == WRITE_CONFIG)
-    {
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
-        spi_handle->ssienr = 0x01;
-    } else if(g_instance.command.cmd == READ_CONFIG)
-    {
-        if(g_instance.is_dual)
-        {
-            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
-        }
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
-        spi_set_tmod(2, SPI_TMOD_TRANS);
-        spi_handle->ssienr = 0x01;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                for(uint32_t i = 0; i < g_instance.command.len / 4; i++)
-                {
-                    spi_handle->dr[0] = ((uint32_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
-                }
-                break;
-            case SPI_TRANS_SHORT:
-                for(uint32_t i = 0; i < g_instance.command.len / 2; i++)
-                {
-                    spi_handle->dr[0] = ((uint16_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
-                }
-                break;
-            default:
-                for(uint32_t i = 0; i < g_instance.command.len; i++)
-                {
-                    spi_handle->dr[0] = ((uint8_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
-                }
-                break;
-        }
-    } else if(g_instance.command.cmd == WRITE_DATA_BYTE)
-    {
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
-        spi_handle->ssienr = 0x01;
-    } else if(g_instance.command.cmd == READ_DATA_BYTE)
-    {
-        if(g_instance.is_dual)
-        {
-            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
-        }
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
-        spi_set_tmod(2, SPI_TMOD_TRANS);
-        spi_handle->ssienr = 0x01;
-        switch(frame_width)
-        {
-            case SPI_TRANS_INT:
-                for(uint32_t i = 0; i < g_instance.command.len / 4; i++)
-                {
-                    spi_handle->dr[0] = ((uint32_t *)(uintptr_t)g_instance.command.addr)[i];
-                }
-                break;
-            case SPI_TRANS_SHORT:
-                for(uint32_t i = 0; i < g_instance.command.len / 2; i++)
-                {
-                    spi_handle->dr[0] = ((uint16_t *)(uintptr_t)g_instance.command.addr)[i];
-                }
-                break;
-            default:
-                for(uint32_t i = 0; i < g_instance.command.len; i++)
-                {
-                    spi_handle->dr[0] = ((uint8_t *)(uintptr_t)g_instance.command.addr)[i];
-                }
-                break;
-        }
-    } else if(g_instance.command.cmd == WRITE_DATA_BLOCK)
-    {
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((32 - 1) << g_instance.dfs);
-
-        spi_handle->dmacr = 0x01;
-        spi_handle->ssienr = 0x01;
-
-        sysctl_dma_select(g_instance.dmac_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-
-        dmac_set_single_mode(g_instance.dmac_channel, (void *)(&spi_handle->dr[0]), (void *)((uintptr_t)g_instance.command.addr & 0xFFFFFFF0), DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, g_instance.command.len * 4);
-    } else if(g_instance.command.cmd == READ_DATA_BLOCK)
-    {
-        if(g_instance.is_dual)
-        {
-            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
-        }
-        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((32 - 1) << g_instance.dfs);
-        spi_set_tmod(2, SPI_TMOD_TRANS);
-        spi_handle->dmacr = 0x02;
-        spi_handle->ssienr = 0x01;
-
-        sysctl_dma_select(g_instance.dmac_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-        dmac_set_single_mode(g_instance.dmac_channel, (void *)((uintptr_t)g_instance.command.addr & 0xFFFFFFF0), (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, g_instance.command.len * 4);
-    } else
-    {
-        spi_slave_idle_mode();
-        return;
-    }
-    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_LOW);
-    dmac_wait_done(g_instance.dmac_channel);
-}
-
-static void spi_slave_transfer_mode(void)
-{
-    spi_transfer_width_t frame_width = spi_get_frame_size(g_instance.data_bit_length - 1);
-    uint32_t command_len = 0;
-
-    switch(frame_width)
-    {
-        case SPI_TRANS_INT:
-            command_len = g_instance.command.len / 4;
-            break;
-        case SPI_TRANS_SHORT:
-            command_len = g_instance.command.len / 2;
-            break;
-        default:
-            command_len = g_instance.command.len;
-            break;
-    }
-    volatile spi_t *spi_handle = spi[2];
-    g_instance.command.err = 0;
-    if(g_instance.command.cmd == WRITE_CONFIG || g_instance.command.cmd == WRITE_DATA_BYTE)
-    {
-        if(spi_handle->rxflr < command_len - 1)
-            g_instance.command.err = 1;
-    } else if(g_instance.command.cmd == READ_CONFIG || g_instance.command.cmd == READ_DATA_BYTE)
-    {
-        if(spi_handle->txflr != 0)
-            g_instance.command.err = 2;
-    } else
-    {
-        spi_slave_idle_mode();
-        return;
-    }
-
-    if(g_instance.command.err == 0)
-    {
-        if(g_instance.command.cmd == WRITE_CONFIG)
-        {
-            switch(frame_width)
-            {
-                case SPI_TRANS_INT:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint32_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
-                    }
-                    break;
-                case SPI_TRANS_SHORT:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint16_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
-                    }
-                    break;
-                default:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint8_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
-                    }
-                    break;
-            }
-        } else if(g_instance.command.cmd == WRITE_DATA_BYTE)
-        {
-            switch(frame_width)
-            {
-                case SPI_TRANS_INT:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint32_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
-                    }
-                    break;
-                case SPI_TRANS_SHORT:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint16_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
-                    }
-                    break;
-                default:
-                    for(uint32_t i = 0; i < command_len; i++)
-                    {
-                        ((uint8_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
-                    }
-                    break;
-            }
-        }
-    }
-    if(g_instance.callback != NULL)
-    {
-        g_instance.callback((void *)&g_instance.command);
-    }
-    spi_slave_idle_mode();
-}
-
-static void spi_slave_cs_irq(void)
-{
-    volatile spi_t *spi_handle = spi[2];
-    if (g_instance.status == IDLE && spi_handle->rxflr == 8)
-		g_instance.status = COMMAND;
-    if(g_instance.status == IDLE)
-        spi_slave_idle_mode();
-    else if(g_instance.status == COMMAND)
-        spi_slave_command_mode();
-    else if(g_instance.status == TRANSFER)
-        spi_slave_transfer_mode();
-}
-
-void spi_slave_config(uint8_t int_pin, uint8_t ready_pin, dmac_channel_number_t dmac_channel, size_t data_bit_length, uint8_t *data, uint32_t len, spi_slave_receive_callback_t callback)
-{
-    g_instance.status = IDLE;
-    g_instance.config_ptr = data;
-    g_instance.config_len = len;
-    g_instance.work_mode = 6;
-    g_instance.slv_oe = 10;
-    g_instance.dfs = 16;
-    g_instance.data_bit_length = data_bit_length;
-    g_instance.ready_pin = ready_pin;
-    g_instance.int_pin = int_pin;
-    g_instance.callback = callback;
-    g_instance.dmac_channel = dmac_channel;
-    g_instance.is_dual = 0;
-    sysctl_reset(SYSCTL_RESET_SPI2);
-    sysctl_clock_enable(SYSCTL_CLOCK_SPI2);
-    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_SPI2, 0);
-
-    uint32_t data_width = data_bit_length / 8;
-    volatile spi_t *spi_handle = spi[2];
-    spi_handle->ssienr = 0x00;
-    spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((data_bit_length - 1) << g_instance.dfs);
-    spi_handle->dmatdlr = 0x04;
-    spi_handle->dmardlr = 0x03;
-    spi_handle->dmacr = 0x00;
-    spi_handle->txftlr = 0x00;
-    spi_handle->rxftlr = 0x08 / data_width - 1;
-    spi_handle->imr = 0x00;
-    spi_handle->ssienr = 0x01;
-
-    gpiohs_set_drive_mode(g_instance.ready_pin, GPIO_DM_OUTPUT);
-    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_HIGH);
-
-    gpiohs_set_drive_mode(g_instance.int_pin, GPIO_DM_INPUT_PULL_UP);
-    gpiohs_set_pin_edge(g_instance.int_pin, GPIO_PE_RISING);
-    gpiohs_set_irq(g_instance.int_pin, 3, spi_slave_cs_irq);
-}
-
-void spi_slave_dual_config(uint8_t int_pin,
-                           uint8_t ready_pin,
-                           uint8_t mosi_pin,
-                           uint8_t miso_pin,
-                           dmac_channel_number_t dmac_channel,
-                           size_t data_bit_length,
-                           uint8_t *data,
-                           uint32_t len,
-                           spi_slave_receive_callback_t callback)
-{
-    spi_slave_config(int_pin, ready_pin, dmac_channel, data_bit_length, data, len, callback);
-    g_instance.is_dual = 1;
-    g_instance.mosi_pin = mosi_pin;
-    g_instance.miso_pin = miso_pin;
-}
-void spi_handle_data_dma(spi_device_num_t spi_num, spi_chip_select_t chip_select, spi_data_t data, plic_interrupt_t *cb)
-{
-    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
-    configASSERT(chip_select < SPI_CHIP_SELECT_MAX);
-    switch(data.transfer_mode)
-    {
-        case SPI_TMOD_TRANS_RECV:
-        case SPI_TMOD_EEROM:
-            configASSERT(data.tx_buf && data.tx_len && data.rx_buf && data.rx_len);
-            break;
-        case SPI_TMOD_TRANS:
-            configASSERT(data.tx_buf && data.tx_len);
-            break;
-        case SPI_TMOD_RECV:
-            configASSERT(data.rx_buf && data.rx_len);
-            break;
-        default:
-            configASSERT(!"Transfer Mode ERR");
-            break;
-    }
-    configASSERT(data.tx_channel < DMAC_CHANNEL_MAX && data.rx_channel < DMAC_CHANNEL_MAX);
-    volatile spi_t *spi_handle = spi[spi_num];
-
-    spinlock_lock(&g_spi_instance[spi_num].lock);
-    if(cb)
-    {
-        g_spi_instance[spi_num].spi_int_instance.callback = cb->callback;
-        g_spi_instance[spi_num].spi_int_instance.ctx = cb->ctx;
-    }
-    switch(data.transfer_mode)
-    {
-        case SPI_TMOD_RECV:
-            spi_set_tmod(spi_num, SPI_TMOD_RECV);
-            if(data.rx_len > 65536)
-                data.rx_len = 65536;
-            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
-            spi_handle->dmacr = 0x03;
-            spi_handle->ssienr = 0x01;
-            if(spi_get_frame_format(spi_num) == SPI_FF_STANDARD)
-                spi_handle->dr[0] = 0xffffffff;
-            if(cb)
-            {
-                dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
-                g_spi_instance[spi_num].dmac_channel = data.rx_channel;
-            }
-            sysctl_dma_select((sysctl_dma_channel_t)data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
-            spi_handle->ser = 1U << chip_select;
-            if(!cb)
-                dmac_wait_done(data.rx_channel);
-            break;
-        case SPI_TMOD_TRANS:
-            spi_set_tmod(spi_num, SPI_TMOD_TRANS);
-            spi_handle->dmacr = 0x2; /*enable dma transmit*/
-            spi_handle->ssienr = 0x01;
-
-            if(cb)
-            {
-                dmac_irq_register(data.tx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
-                g_spi_instance[spi_num].dmac_channel = data.tx_channel;
-            }
-            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-            if(data.fill_mode)
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            else
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            spi_handle->ser = 1U << chip_select;
-            if(!cb)
-            {
-                dmac_wait_done(data.tx_channel);
-                while((spi_handle->sr & 0x05) != 0x04)
-                    ;
-            }
-            break;
-        case SPI_TMOD_EEROM:
-            spi_set_tmod(spi_num, SPI_TMOD_EEROM);
-            if(data.rx_len > 65536)
-                data.rx_len = 65536;
-            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
-            spi_handle->dmacr = 0x3;
-            spi_handle->ssienr = 0x01;
-            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-            if(data.fill_mode)
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            else
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            if(cb)
-            {
-                dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
-                g_spi_instance[spi_num].dmac_channel = data.rx_channel;
-            }
-            sysctl_dma_select(data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
-            spi_handle->ser = 1U << chip_select;
-            if(!cb)
-            {
-                dmac_wait_done(data.tx_channel);
-                dmac_wait_done(data.rx_channel);
-            }
-
-            break;
-        case SPI_TMOD_TRANS_RECV:
-            spi_set_tmod(spi_num, SPI_TMOD_TRANS_RECV);
-            if(data.rx_len > 65536)
-                data.rx_len = 65536;
-
-            if(cb)
-            {
-                if(data.tx_len > data.rx_len)
-                {
-                    dmac_irq_register(data.tx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
-                    g_spi_instance[spi_num].dmac_channel = data.tx_channel;
-                } else
-                {
-                    dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
-                    g_spi_instance[spi_num].dmac_channel = data.rx_channel;
-                }
-            }
-            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
-            spi_handle->dmacr = 0x3;
-            spi_handle->ssienr = 0x01;
-            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
-            if(data.fill_mode)
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            else
-                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
-                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
-            sysctl_dma_select(data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
-            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
-                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
-            spi_handle->ser = 1U << chip_select;
-            if(!cb)
-            {
-                dmac_wait_done(data.tx_channel);
-                dmac_wait_done(data.rx_channel);
-            }
-            break;
-    }
-    if(!cb)
-    {
-        spinlock_unlock(&g_spi_instance[spi_num].lock);
-        spi_handle->ser = 0x00;
-        spi_handle->ssienr = 0x00;
-    }
-}
+/* Copyright 2018 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <bsp.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fpioa.h"
+#include "gpiohs.h"
+#include "platform.h"
+#include "spi.h"
+#include "sysctl.h"
+#include "utils.h"
+#include "iomem.h"
+
+volatile spi_t *const spi[4] =
+    {
+        (volatile spi_t *)SPI0_BASE_ADDR,
+        (volatile spi_t *)SPI1_BASE_ADDR,
+        (volatile spi_t *)SPI_SLAVE_BASE_ADDR,
+        (volatile spi_t *)SPI3_BASE_ADDR};
+
+typedef struct _spi_dma_context
+{
+    uint8_t *buffer;
+    size_t buf_len;
+    uint32_t *malloc_buffer;
+    spi_transfer_mode_t int_mode;
+    dmac_channel_number_t dmac_channel;
+    spi_device_num_t spi_num;
+    plic_instance_t spi_int_instance;
+} spi_dma_context_t;
+
+spi_dma_context_t spi_dma_context[4];
+
+typedef struct _spi_instance_t
+{
+    spi_device_num_t spi_num;
+    spi_transfer_mode_t transfer_mode;
+    dmac_channel_number_t dmac_channel;
+    plic_instance_t spi_int_instance;
+    spinlock_t lock;
+} spi_instance_t;
+
+static spi_instance_t g_spi_instance[4];
+
+static spi_slave_instance_t g_instance;
+
+static spi_frame_format_t spi_get_frame_format(spi_device_num_t spi_num)
+{
+    uint8_t frf_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            frf_offset = 21;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            frf_offset = 22;
+            break;
+    }
+    volatile spi_t *spi_adapter = spi[spi_num];
+    return ((spi_adapter->ctrlr0 >> frf_offset) & 0x3);
+}
+
+static spi_transfer_width_t spi_get_frame_size(size_t data_bit_length)
+{
+    if(data_bit_length < 8)
+        return SPI_TRANS_CHAR;
+    else if(data_bit_length < 16)
+        return SPI_TRANS_SHORT;
+    return SPI_TRANS_INT;
+}
+
+static int spi_dma_irq(void *ctx)
+{
+    spi_instance_t *v_instance = (spi_instance_t *)ctx;
+    volatile spi_t *spi_handle = spi[v_instance->spi_num];
+    dmac_irq_unregister(v_instance->dmac_channel);
+    while((spi_handle->sr & 0x05) != 0x04)
+        ;
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+    spinlock_unlock(&v_instance->lock);
+    if(v_instance->spi_int_instance.callback)
+    {
+        v_instance->spi_int_instance.callback(v_instance->spi_int_instance.ctx);
+    }
+    return 0;
+}
+
+static int spi_clk_init(uint8_t spi_num)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    if(spi_num == 3)
+        sysctl_clock_set_clock_select(SYSCTL_CLOCK_SELECT_SPI3, 1);
+    sysctl_clock_enable(SYSCTL_CLOCK_SPI0 + spi_num);
+    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_SPI0 + spi_num, 0);
+    return 0;
+}
+
+static void spi_set_tmod(uint8_t spi_num, uint32_t tmod)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX);
+    volatile spi_t *spi_handle = spi[spi_num];
+    uint8_t tmod_offset = 0;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+        case 2:
+            tmod_offset = 8;
+            break;
+        case 3:
+        default:
+            tmod_offset = 10;
+            break;
+    }
+    set_bit(&spi_handle->ctrlr0, 3 << tmod_offset, tmod << tmod_offset);
+}
+
+void spi_init(spi_device_num_t spi_num, spi_work_mode_t work_mode, spi_frame_format_t frame_format,
+              size_t data_bit_length, uint32_t endian)
+{
+    configASSERT(data_bit_length >= 4 && data_bit_length <= 32);
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    spi_clk_init(spi_num);
+
+    uint8_t dfs_offset, frf_offset, work_mode_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            frf_offset = 21;
+            work_mode_offset = 6;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            frf_offset = 22;
+            work_mode_offset = 8;
+            break;
+    }
+
+    switch(frame_format)
+    {
+        case SPI_FF_DUAL:
+            configASSERT(data_bit_length % 2 == 0);
+            break;
+        case SPI_FF_QUAD:
+            configASSERT(data_bit_length % 4 == 0);
+            break;
+        case SPI_FF_OCTAL:
+            configASSERT(data_bit_length % 8 == 0);
+            break;
+        default:
+            break;
+    }
+    volatile spi_t *spi_adapter = spi[spi_num];
+    if(spi_adapter->baudr == 0)
+        spi_adapter->baudr = 0x14;
+    spi_adapter->imr = 0x00;
+    spi_adapter->dmacr = 0x00;
+    spi_adapter->dmatdlr = 0x10;
+    spi_adapter->dmardlr = 0x00;
+    spi_adapter->ser = 0x00;
+    spi_adapter->ssienr = 0x00;
+    spi_adapter->ctrlr0 = (work_mode << work_mode_offset) | (frame_format << frf_offset) | ((data_bit_length - 1) << dfs_offset);
+    spi_adapter->spi_ctrlr0 = 0;
+    spi_adapter->endian = endian;
+}
+
+void spi_init_non_standard(spi_device_num_t spi_num, uint32_t instruction_length, uint32_t address_length,
+                           uint32_t wait_cycles, spi_instruction_address_trans_mode_t instruction_address_trans_mode)
+{
+    configASSERT(wait_cycles < (1 << 5));
+    configASSERT(instruction_address_trans_mode < 3);
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    volatile spi_t *spi_handle = spi[spi_num];
+    uint32_t inst_l = 0;
+    switch(instruction_length)
+    {
+        case 0:
+            inst_l = 0;
+            break;
+        case 4:
+            inst_l = 1;
+            break;
+        case 8:
+            inst_l = 2;
+            break;
+        case 16:
+            inst_l = 3;
+            break;
+        default:
+            configASSERT(!"Invalid instruction length");
+            break;
+    }
+
+    configASSERT(address_length % 4 == 0 && address_length <= 60);
+    uint32_t addr_l = address_length / 4;
+
+    spi_handle->spi_ctrlr0 = (wait_cycles << 11) | (inst_l << 8) | (addr_l << 2) | instruction_address_trans_mode;
+}
+
+uint32_t spi_set_clk_rate(spi_device_num_t spi_num, uint32_t spi_clk)
+{
+    uint32_t spi_baudr = sysctl_clock_get_freq(SYSCTL_CLOCK_SPI0 + spi_num) / spi_clk;
+    if(spi_baudr < 2)
+    {
+        spi_baudr = 2;
+    } else if(spi_baudr > 65534)
+    {
+        spi_baudr = 65534;
+    }
+    volatile spi_t *spi_adapter = spi[spi_num];
+    spi_adapter->baudr = spi_baudr;
+    return sysctl_clock_get_freq(SYSCTL_CLOCK_SPI0 + spi_num) / spi_baudr;
+}
+
+void spi_send_data_normal(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    size_t index, fifo_len;
+    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
+
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    uint8_t v_misalign_flag = 0;
+    uint32_t v_send_data;
+    if((uintptr_t)tx_buff % frame_width)
+        v_misalign_flag = 1;
+
+    spi_handle->ssienr = 0x01;
+    spi_handle->ser = 1U << chip_select;
+    uint32_t i = 0;
+    while(tx_len)
+    {
+        fifo_len = 32 - spi_handle->txflr;
+        fifo_len = fifo_len < tx_len ? fifo_len : tx_len;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                fifo_len = fifo_len / 4 * 4;
+                if(v_misalign_flag)
+                {
+                    for(index = 0; index < fifo_len; index += 4)
+                    {
+                        memcpy(&v_send_data, tx_buff + i, 4);
+                        spi_handle->dr[0] = v_send_data;
+                        i += 4;
+                    }
+                } else
+                {
+                    for(index = 0; index < fifo_len / 4; index++)
+                        spi_handle->dr[0] = ((uint32_t *)tx_buff)[i++];
+                }
+                break;
+            case SPI_TRANS_SHORT:
+                fifo_len = fifo_len / 2 * 2;
+                if(v_misalign_flag)
+                {
+                    for(index = 0; index < fifo_len; index += 2)
+                    {
+                        memcpy(&v_send_data, tx_buff + i, 2);
+                        spi_handle->dr[0] = v_send_data;
+                        i += 2;
+                    }
+                } else
+                {
+                    for(index = 0; index < fifo_len / 2; index++)
+                        spi_handle->dr[0] = ((uint16_t *)tx_buff)[i++];
+                }
+                break;
+            default:
+                for(index = 0; index < fifo_len; index++)
+                    spi_handle->dr[0] = tx_buff[i++];
+                break;
+        }
+        tx_len -= fifo_len;
+    }
+    while((spi_handle->sr & 0x05) != 0x04)
+        ;
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+void spi_send_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    uint8_t *v_buf = malloc(cmd_len + tx_len);
+    size_t i;
+    for(i = 0; i < cmd_len; i++)
+        v_buf[i] = cmd_buff[i];
+    for(i = 0; i < tx_len; i++)
+        v_buf[cmd_len + i] = tx_buff[i];
+
+    spi_send_data_normal(spi_num, chip_select, v_buf, cmd_len + tx_len);
+    free((void *)v_buf);
+}
+
+void spi_send_data_standard_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                                spi_chip_select_t chip_select,
+                                const uint8_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    uint32_t *buf;
+    size_t v_send_len;
+    int i;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            buf = malloc(cmd_len + tx_len);
+            for(i = 0; i < cmd_len / 4; i++)
+                buf[i] = ((uint32_t *)cmd_buff)[i];
+            for(i = 0; i < tx_len / 4; i++)
+                buf[cmd_len / 4 + i] = ((uint32_t *)tx_buff)[i];
+            v_send_len = (cmd_len + tx_len) / 4;
+            break;
+        case SPI_TRANS_SHORT:
+            buf = malloc((cmd_len + tx_len) / 2 * sizeof(uint32_t));
+            for(i = 0; i < cmd_len / 2; i++)
+                buf[i] = ((uint16_t *)cmd_buff)[i];
+            for(i = 0; i < tx_len / 2; i++)
+                buf[cmd_len / 2 + i] = ((uint16_t *)tx_buff)[i];
+            v_send_len = (cmd_len + tx_len) / 2;
+            break;
+        default:
+            buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
+            for(i = 0; i < cmd_len; i++)
+                buf[i] = cmd_buff[i];
+            for(i = 0; i < tx_len; i++)
+                buf[cmd_len + i] = tx_buff[i];
+            v_send_len = cmd_len + tx_len;
+            break;
+    }
+
+    spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
+
+    free((void *)buf);
+}
+
+void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                              spi_chip_select_t chip_select,
+                              const void *tx_buff, size_t tx_len, spi_transfer_width_t spi_transfer_width)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
+    volatile spi_t *spi_handle = spi[spi_num];
+    uint32_t *buf;
+    int i;
+    switch(spi_transfer_width)
+    {
+        case SPI_TRANS_SHORT:
+#if FIX_CACHE
+            buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
+#else
+            buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
+#endif
+            for(i = 0; i < tx_len; i++)
+                buf[i] = ((uint16_t *)tx_buff)[i];
+            break;
+        case SPI_TRANS_INT:
+            buf = (uint32_t *)tx_buff;
+            break;
+        case SPI_TRANS_CHAR:
+        default:
+#if FIX_CACHE
+            buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
+#else
+            buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
+#endif
+
+            for(i = 0; i < tx_len; i++)
+                buf[i] = ((uint8_t *)tx_buff)[i];
+            break;
+    }
+    spi_handle->dmacr = 0x2; /*enable dma transmit*/
+    spi_handle->ssienr = 0x01;
+
+    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+
+    dmac_set_single_mode(channel_num, buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, tx_len);
+    spi_handle->ser = 1U << chip_select;
+    dmac_wait_done(channel_num);
+    if(spi_transfer_width != SPI_TRANS_INT)
+    {
+#if FIX_CACHE
+        iomem_free((void *)buf);
+#else
+        free((void *)buf);
+#endif
+    }
+    while((spi_handle->sr & 0x05) != 0x04)
+        ;
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select,
+                                   const uint8_t *tx_buf, size_t tx_len, uint8_t *rx_buf, size_t rx_len)
+{
+    spi_set_tmod(spi_num, SPI_TMOD_TRANS_RECV);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+    size_t v_tx_len = tx_len / frame_width;
+    size_t v_rx_len = rx_len / frame_width;
+
+    size_t v_max_len = v_tx_len > v_rx_len ? v_tx_len : v_rx_len;
+
+#if FIX_CACHE
+    uint32_t *v_tx_buf = iomem_malloc(v_max_len * 4);
+    uint32_t *v_rx_buf = iomem_malloc(v_max_len * 4);
+#else
+    uint32_t *v_tx_buf = malloc(v_max_len * 4);
+    uint32_t *v_rx_buf = malloc(v_max_len * 4);
+#endif
+    uint32_t i = 0;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            for(i = 0; i < v_tx_len; i++)
+            {
+                v_tx_buf[i] = ((uint32_t *)tx_buf)[i];
+            }
+            if(v_max_len > v_tx_len)
+            {
+                while(i < v_max_len)
+                {
+                    v_tx_buf[i++] = 0xFFFFFFFF;
+                }
+            }
+            break;
+        case SPI_TRANS_SHORT:
+            for(i = 0; i < v_tx_len; i++)
+            {
+                v_tx_buf[i] = ((uint16_t *)tx_buf)[i];
+            }
+            if(v_max_len > v_tx_len)
+            {
+                while(i < v_max_len)
+                {
+                    v_tx_buf[i++] = 0xFFFFFFFF;
+                }
+            }
+            break;
+        default:
+            for(i = 0; i < v_tx_len; i++)
+            {
+                v_tx_buf[i] = tx_buf[i];
+            }
+            if(v_max_len > v_tx_len)
+            {
+                while(i < v_max_len)
+                {
+                    v_tx_buf[i++] = 0xFFFFFFFF;
+                }
+            }
+            break;
+    }
+
+    spi_handle->dmacr = 0x3;
+    spi_handle->ssienr = 0x01;
+
+    sysctl_dma_select((sysctl_dma_channel_t)dma_send_channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+    sysctl_dma_select((sysctl_dma_channel_t)dma_receive_channel_num, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+
+    dmac_set_single_mode(dma_receive_channel_num, (void *)(&spi_handle->dr[0]), v_rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, v_max_len);
+
+    dmac_set_single_mode(dma_send_channel_num, v_tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                         DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, v_max_len);
+
+    spi_handle->ser = 1U << chip_select;
+    dmac_wait_done(dma_send_channel_num);
+    dmac_wait_done(dma_receive_channel_num);
+
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            for(i = 0; i < v_rx_len; i++)
+                ((uint32_t *)rx_buf)[i] = v_rx_buf[i];
+            break;
+        case SPI_TRANS_SHORT:
+            for(i = 0; i < v_rx_len; i++)
+                ((uint16_t *)rx_buf)[i] = v_rx_buf[i];
+            break;
+        default:
+            for(i = 0; i < v_rx_len; i++)
+                rx_buf[i] = v_rx_buf[i];
+            break;
+    }
+#if FIX_CACHE
+    iomem_free(v_tx_buf);
+    iomem_free(v_rx_buf);
+#else
+    free(v_tx_buf);
+    free(v_rx_buf);
+#endif
+}
+
+void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    size_t index, fifo_len;
+    if(cmd_len == 0)
+        spi_set_tmod(spi_num, SPI_TMOD_RECV);
+    else
+        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    uint32_t i = 0;
+    size_t v_cmd_len = cmd_len / frame_width;
+    uint32_t v_rx_len = rx_len / frame_width;
+
+    spi_handle->ctrlr1 = (uint32_t)(v_rx_len - 1);
+    spi_handle->ssienr = 0x01;
+
+    while(v_cmd_len)
+    {
+        fifo_len = 32 - spi_handle->txflr;
+        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                for(index = 0; index < fifo_len; index++)
+                    spi_handle->dr[0] = ((uint32_t *)cmd_buff)[i++];
+                break;
+            case SPI_TRANS_SHORT:
+                for(index = 0; index < fifo_len; index++)
+                    spi_handle->dr[0] = ((uint16_t *)cmd_buff)[i++];
+                break;
+            default:
+                for(index = 0; index < fifo_len; index++)
+                    spi_handle->dr[0] = cmd_buff[i++];
+                break;
+        }
+        spi_handle->ser = 1U << chip_select;
+        v_cmd_len -= fifo_len;
+    }
+
+    if(cmd_len == 0)
+    {
+        spi_handle->dr[0] = 0xffffffff;
+        spi_handle->ser = 1U << chip_select;
+    }
+
+    i = 0;
+    while(v_rx_len)
+    {
+        fifo_len = spi_handle->rxflr;
+        fifo_len = fifo_len < v_rx_len ? fifo_len : v_rx_len;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                for(index = 0; index < fifo_len; index++)
+                    ((uint32_t *)rx_buff)[i++] = spi_handle->dr[0];
+                break;
+            case SPI_TRANS_SHORT:
+                for(index = 0; index < fifo_len; index++)
+                    ((uint16_t *)rx_buff)[i++] = (uint16_t)spi_handle->dr[0];
+                break;
+            default:
+                for(index = 0; index < fifo_len; index++)
+                    rx_buff[i++] = (uint8_t)spi_handle->dr[0];
+                break;
+        }
+
+        v_rx_len -= fifo_len;
+    }
+
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+void spi_receive_data_normal_dma(dmac_channel_number_t dma_send_channel_num,
+                                 dmac_channel_number_t dma_receive_channel_num,
+                                 spi_device_num_t spi_num, spi_chip_select_t chip_select, const void *cmd_buff,
+                                 size_t cmd_len, void *rx_buff, size_t rx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    if(cmd_len == 0)
+        spi_set_tmod(spi_num, SPI_TMOD_RECV);
+    else
+        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
+
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    spi_handle->ctrlr1 = (uint32_t)(rx_len - 1);
+    spi_handle->dmacr = 0x3;
+    spi_handle->ssienr = 0x01;
+    if(cmd_len)
+        sysctl_dma_select((sysctl_dma_channel_t)dma_send_channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+    sysctl_dma_select((sysctl_dma_channel_t)dma_receive_channel_num, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+
+    dmac_set_single_mode(dma_receive_channel_num, (void *)(&spi_handle->dr[0]), rx_buff, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, rx_len);
+    if(cmd_len)
+        dmac_set_single_mode(dma_send_channel_num, cmd_buff, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, cmd_len);
+    if(cmd_len == 0 && spi_get_frame_format(spi_num) == SPI_FF_STANDARD)
+        spi[spi_num]->dr[0] = 0xffffffff;
+    spi_handle->ser = 1U << chip_select;
+    if(cmd_len)
+        dmac_wait_done(dma_send_channel_num);
+    dmac_wait_done(dma_receive_channel_num);
+
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
+                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    size_t i;
+
+    uint32_t *write_cmd;
+    uint32_t *read_buf;
+    size_t v_recv_len;
+    size_t v_cmd_len;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+#if FIX_CACHE
+            write_cmd = iomem_malloc(cmd_len + rx_len);
+#else
+            write_cmd = malloc(cmd_len + rx_len);
+#endif
+            for(i = 0; i < cmd_len / 4; i++)
+                write_cmd[i] = ((uint32_t *)cmd_buff)[i];
+            read_buf = &write_cmd[i];
+            v_recv_len = rx_len / 4;
+            v_cmd_len = cmd_len / 4;
+            break;
+        case SPI_TRANS_SHORT:
+#if FIX_CACHE
+            write_cmd = iomem_malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
+#else
+            write_cmd = malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len / 2; i++)
+                write_cmd[i] = ((uint16_t *)cmd_buff)[i];
+            read_buf = &write_cmd[i];
+            v_recv_len = rx_len / 2;
+            v_cmd_len = cmd_len / 2;
+            break;
+        default:
+#if FIX_CACHE
+            write_cmd = iomem_malloc((cmd_len + rx_len) * sizeof(uint32_t));
+#else
+            write_cmd = malloc((cmd_len + rx_len) * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len; i++)
+                write_cmd[i] = cmd_buff[i];
+            read_buf = &write_cmd[i];
+            v_recv_len = rx_len;
+            v_cmd_len = cmd_len;
+            break;
+    }
+
+    spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, write_cmd, v_cmd_len, read_buf, v_recv_len);
+
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            for(i = 0; i < v_recv_len; i++)
+                ((uint32_t *)rx_buff)[i] = read_buf[i];
+            break;
+        case SPI_TRANS_SHORT:
+            for(i = 0; i < v_recv_len; i++)
+                ((uint16_t *)rx_buff)[i] = read_buf[i];
+            break;
+        default:
+            for(i = 0; i < v_recv_len; i++)
+                rx_buff[i] = read_buf[i];
+            break;
+    }
+#if FIX_CACHE
+    iomem_free(write_cmd);
+#else
+    free(write_cmd);
+#endif
+}
+
+void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                               size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    size_t index, fifo_len;
+    if(cmd_len == 0)
+        spi_set_tmod(spi_num, SPI_TMOD_RECV);
+    else
+        spi_set_tmod(spi_num, SPI_TMOD_EEROM);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    uint32_t v_cmd_len = cmd_len;
+    uint32_t i = 0;
+
+    uint32_t v_rx_len = rx_len / frame_width;
+
+    spi_handle->ctrlr1 = (uint32_t)(v_rx_len - 1);
+    spi_handle->ssienr = 0x01;
+
+    while(v_cmd_len)
+    {
+        fifo_len = 32 - spi_handle->txflr;
+        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
+
+        for(index = 0; index < fifo_len; index++)
+            spi_handle->dr[0] = *cmd_buff++;
+
+        spi_handle->ser = 1U << chip_select;
+        v_cmd_len -= fifo_len;
+    }
+
+    if(cmd_len == 0)
+    {
+        spi_handle->ser = 1U << chip_select;
+    }
+
+    while(v_rx_len)
+    {
+        fifo_len = spi_handle->rxflr;
+        fifo_len = fifo_len < v_rx_len ? fifo_len : v_rx_len;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                for(index = 0; index < fifo_len; index++)
+                    ((uint32_t *)rx_buff)[i++] = spi_handle->dr[0];
+                break;
+            case SPI_TRANS_SHORT:
+                for(index = 0; index < fifo_len; index++)
+                    ((uint16_t *)rx_buff)[i++] = (uint16_t)spi_handle->dr[0];
+                break;
+            default:
+                for(index = 0; index < fifo_len; index++)
+                    rx_buff[i++] = (uint8_t)spi_handle->dr[0];
+                break;
+        }
+
+        v_rx_len -= fifo_len;
+    }
+
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
+                                   dmac_channel_number_t dma_receive_channel_num,
+                                   spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                                   size_t cmd_len, uint8_t *rx_buff, size_t rx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    size_t i;
+
+    uint32_t *write_cmd = NULL;
+    uint32_t *read_buf;
+    size_t v_recv_len;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            v_recv_len = rx_len / 4;
+            break;
+        case SPI_TRANS_SHORT:
+#if FIX_CACHE
+            write_cmd = iomem_malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
+#else
+            write_cmd = malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len; i++)
+                write_cmd[i] = cmd_buff[i];
+            read_buf = &write_cmd[i];
+            v_recv_len = rx_len / 2;
+            break;
+        default:
+#if FIX_CACHE
+            write_cmd = iomem_malloc(cmd_len + rx_len * sizeof(uint32_t));
+#else
+            write_cmd = malloc(cmd_len + rx_len * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len; i++)
+                write_cmd[i] = cmd_buff[i];
+            read_buf = &write_cmd[i];
+            v_recv_len = rx_len;
+            break;
+    }
+    if(frame_width == SPI_TRANS_INT)
+        spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, cmd_buff, cmd_len, rx_buff, v_recv_len);
+    else
+        spi_receive_data_normal_dma(dma_send_channel_num, dma_receive_channel_num, spi_num, chip_select, write_cmd, cmd_len, read_buf, v_recv_len);
+
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            break;
+        case SPI_TRANS_SHORT:
+            for(i = 0; i < v_recv_len; i++)
+                ((uint16_t *)rx_buff)[i] = read_buf[i];
+            break;
+        default:
+            for(i = 0; i < v_recv_len; i++)
+                rx_buff[i] = read_buf[i];
+            break;
+    }
+
+    if(frame_width != SPI_TRANS_INT)
+    {
+#if FIX_CACHE
+        iomem_free(write_cmd);
+#else
+        free(write_cmd);
+#endif
+    }
+}
+
+void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
+                            size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    size_t index, fifo_len;
+    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
+    volatile spi_t *spi_handle = spi[spi_num];
+    spi_handle->ssienr = 0x01;
+    spi_handle->ser = 1U << chip_select;
+
+    size_t v_cmd_len = cmd_len * 4;
+    while(v_cmd_len)
+    {
+        fifo_len = 32 - spi_handle->txflr;
+        fifo_len = fifo_len < v_cmd_len ? fifo_len : v_cmd_len;
+        fifo_len = fifo_len / 4 * 4;
+        for(index = 0; index < fifo_len / 4; index++)
+            spi_handle->dr[0] = *cmd_buff++;
+        v_cmd_len -= fifo_len;
+    }
+    spi_send_data_normal(spi_num, chip_select, tx_buff, tx_len);
+}
+
+void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num,
+                                spi_chip_select_t chip_select,
+                                const uint32_t *cmd_buff, size_t cmd_len, const uint8_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    uint8_t dfs_offset;
+    switch(spi_num)
+    {
+        case 0:
+        case 1:
+            dfs_offset = 16;
+            break;
+        case 2:
+            configASSERT(!"Spi Bus 2 Not Support!");
+            break;
+        case 3:
+        default:
+            dfs_offset = 0;
+            break;
+    }
+    uint32_t data_bit_length = (spi_handle->ctrlr0 >> dfs_offset) & 0x1F;
+    spi_transfer_width_t frame_width = spi_get_frame_size(data_bit_length);
+
+    uint32_t *buf;
+    size_t v_send_len;
+    int i;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+#if FIX_CACHE
+            buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len);
+#else
+            buf = malloc(cmd_len * sizeof(uint32_t) + tx_len);
+#endif
+            for(i = 0; i < cmd_len; i++)
+                buf[i] = cmd_buff[i];
+            for(i = 0; i < tx_len / 4; i++)
+                buf[cmd_len + i] = ((uint32_t *)tx_buff)[i];
+            v_send_len = cmd_len + tx_len / 4;
+            break;
+        case SPI_TRANS_SHORT:
+#if FIX_CACHE
+            buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
+#else
+            buf = malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len; i++)
+                buf[i] = cmd_buff[i];
+            for(i = 0; i < tx_len / 2; i++)
+                buf[cmd_len + i] = ((uint16_t *)tx_buff)[i];
+            v_send_len = cmd_len + tx_len / 2;
+            break;
+        default:
+#if FIX_CACHE
+            buf = iomem_malloc((cmd_len + tx_len) * sizeof(uint32_t));
+#else
+            buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
+#endif
+            for(i = 0; i < cmd_len; i++)
+                buf[i] = cmd_buff[i];
+            for(i = 0; i < tx_len; i++)
+                buf[cmd_len + i] = tx_buff[i];
+            v_send_len = cmd_len + tx_len;
+            break;
+    }
+
+    spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
+
+#if FIX_CACHE
+    iomem_free((void *)buf);
+#else
+    free((void *)buf);
+#endif
+}
+
+void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,
+                       const uint32_t *tx_buff, size_t tx_len)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+
+    spi_set_tmod(spi_num, SPI_TMOD_TRANS);
+    volatile spi_t *spi_handle = spi[spi_num];
+    spi_handle->dmacr = 0x2; /*enable dma transmit*/
+    spi_handle->ssienr = 0x01;
+
+    sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+    dmac_set_single_mode(channel_num, tx_buff, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
+                         DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, tx_len);
+    spi_handle->ser = 1U << chip_select;
+    dmac_wait_done(channel_num);
+
+    while((spi_handle->sr & 0x05) != 0x04)
+        ;
+    spi_handle->ser = 0x00;
+    spi_handle->ssienr = 0x00;
+}
+
+static void spi_slave_idle_mode(void)
+{
+    if(g_instance.is_dual)
+    {
+        fpioa_set_function(g_instance.mosi_pin, FUNC_SPI_SLAVE_D0);
+    }
+    volatile spi_t *spi_handle = spi[2];
+    g_instance.status = IDLE;
+    spi_handle->ssienr = 0x00;
+    spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
+    spi_handle->dmacr = 0x00;
+    spi_handle->ssienr = 0x01;
+    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_HIGH);
+}
+
+static void spi_slave_command_mode(void)
+{
+    volatile spi_t *spi_handle = spi[2];
+    uint8_t cmd_data[8], sum = 0;
+
+    spi_transfer_width_t frame_width = spi_get_frame_size(g_instance.data_bit_length - 1);
+    spi_device_num_t spi_num = SPI_DEVICE_2;
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            for(uint32_t i = 0; i < 8 / 4; i++)
+                ((uint32_t *)cmd_data)[i] = spi_handle->dr[0];
+            break;
+        case SPI_TRANS_SHORT:
+            for(uint32_t i = 0; i < 8 / 2; i++)
+                ((uint16_t *)cmd_data)[i] = spi_handle->dr[0];
+            break;
+        default:
+            for(uint32_t i = 0; i < 8; i++)
+                cmd_data[i] = spi_handle->dr[0];
+            break;
+    }
+
+    for(uint32_t i = 0; i < 7; i++)
+    {
+        sum += cmd_data[i];
+    }
+    if(cmd_data[7] != sum)
+    {
+        spi_slave_idle_mode();
+        return;
+    }
+    g_instance.command.cmd = cmd_data[0];
+    g_instance.command.addr = cmd_data[1] | (cmd_data[2] << 8) | (cmd_data[3] << 16) | (cmd_data[4] << 24);
+
+    g_instance.command.len = cmd_data[5] | (cmd_data[6] << 8);
+    if(g_instance.command.len == 0)
+        g_instance.command.len = 65536;
+    if((g_instance.command.cmd < WRITE_DATA_BLOCK) && (g_instance.command.len > 8))
+    {
+        spi_slave_idle_mode();
+        return;
+    }
+    g_instance.status = TRANSFER;
+    spi_handle->ssienr = 0x00;
+    if(g_instance.command.cmd == WRITE_CONFIG)
+    {
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
+        spi_handle->ssienr = 0x01;
+    } else if(g_instance.command.cmd == READ_CONFIG)
+    {
+        if(g_instance.is_dual)
+        {
+            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
+        }
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
+        spi_set_tmod(2, SPI_TMOD_TRANS);
+        spi_handle->ssienr = 0x01;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                for(uint32_t i = 0; i < g_instance.command.len / 4; i++)
+                {
+                    spi_handle->dr[0] = ((uint32_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
+                }
+                break;
+            case SPI_TRANS_SHORT:
+                for(uint32_t i = 0; i < g_instance.command.len / 2; i++)
+                {
+                    spi_handle->dr[0] = ((uint16_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
+                }
+                break;
+            default:
+                for(uint32_t i = 0; i < g_instance.command.len; i++)
+                {
+                    spi_handle->dr[0] = ((uint8_t *)&g_instance.config_ptr[g_instance.command.addr])[i];
+                }
+                break;
+        }
+    } else if(g_instance.command.cmd == WRITE_DATA_BYTE)
+    {
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
+        spi_handle->ssienr = 0x01;
+    } else if(g_instance.command.cmd == READ_DATA_BYTE)
+    {
+        if(g_instance.is_dual)
+        {
+            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
+        }
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((g_instance.data_bit_length - 1) << g_instance.dfs);
+        spi_set_tmod(2, SPI_TMOD_TRANS);
+        spi_handle->ssienr = 0x01;
+        switch(frame_width)
+        {
+            case SPI_TRANS_INT:
+                for(uint32_t i = 0; i < g_instance.command.len / 4; i++)
+                {
+                    spi_handle->dr[0] = ((uint32_t *)(uintptr_t)g_instance.command.addr)[i];
+                }
+                break;
+            case SPI_TRANS_SHORT:
+                for(uint32_t i = 0; i < g_instance.command.len / 2; i++)
+                {
+                    spi_handle->dr[0] = ((uint16_t *)(uintptr_t)g_instance.command.addr)[i];
+                }
+                break;
+            default:
+                for(uint32_t i = 0; i < g_instance.command.len; i++)
+                {
+                    spi_handle->dr[0] = ((uint8_t *)(uintptr_t)g_instance.command.addr)[i];
+                }
+                break;
+        }
+    } else if(g_instance.command.cmd == WRITE_DATA_BLOCK)
+    {
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((32 - 1) << g_instance.dfs);
+
+        spi_handle->dmacr = 0x01;
+        spi_handle->ssienr = 0x01;
+
+        sysctl_dma_select(g_instance.dmac_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+
+        dmac_set_single_mode(g_instance.dmac_channel, (void *)(&spi_handle->dr[0]), (void *)((uintptr_t)g_instance.command.addr & 0xFFFFFFF0), DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, g_instance.command.len * 4);
+    } else if(g_instance.command.cmd == READ_DATA_BLOCK)
+    {
+        if(g_instance.is_dual)
+        {
+            fpioa_set_function(g_instance.miso_pin, FUNC_SPI_SLAVE_D0);
+        }
+        spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x0 << g_instance.slv_oe) | ((32 - 1) << g_instance.dfs);
+        spi_set_tmod(2, SPI_TMOD_TRANS);
+        spi_handle->dmacr = 0x02;
+        spi_handle->ssienr = 0x01;
+
+        sysctl_dma_select(g_instance.dmac_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+        dmac_set_single_mode(g_instance.dmac_channel, (void *)((uintptr_t)g_instance.command.addr & 0xFFFFFFF0), (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                             DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, g_instance.command.len * 4);
+    } else
+    {
+        spi_slave_idle_mode();
+        return;
+    }
+    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_LOW);
+    dmac_wait_done(g_instance.dmac_channel);
+}
+
+static void spi_slave_transfer_mode(void)
+{
+    spi_transfer_width_t frame_width = spi_get_frame_size(g_instance.data_bit_length - 1);
+    uint32_t command_len = 0;
+
+    switch(frame_width)
+    {
+        case SPI_TRANS_INT:
+            command_len = g_instance.command.len / 4;
+            break;
+        case SPI_TRANS_SHORT:
+            command_len = g_instance.command.len / 2;
+            break;
+        default:
+            command_len = g_instance.command.len;
+            break;
+    }
+    volatile spi_t *spi_handle = spi[2];
+    g_instance.command.err = 0;
+    if(g_instance.command.cmd == WRITE_CONFIG || g_instance.command.cmd == WRITE_DATA_BYTE)
+    {
+        if(spi_handle->rxflr < command_len - 1)
+            g_instance.command.err = 1;
+    } else if(g_instance.command.cmd == READ_CONFIG || g_instance.command.cmd == READ_DATA_BYTE)
+    {
+        if(spi_handle->txflr != 0)
+            g_instance.command.err = 2;
+    } else
+    {
+        spi_slave_idle_mode();
+        return;
+    }
+
+    if(g_instance.command.err == 0)
+    {
+        if(g_instance.command.cmd == WRITE_CONFIG)
+        {
+            switch(frame_width)
+            {
+                case SPI_TRANS_INT:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint32_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
+                    }
+                    break;
+                case SPI_TRANS_SHORT:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint16_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
+                    }
+                    break;
+                default:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint8_t *)&g_instance.config_ptr[g_instance.command.addr])[i] = spi_handle->dr[0];
+                    }
+                    break;
+            }
+        } else if(g_instance.command.cmd == WRITE_DATA_BYTE)
+        {
+            switch(frame_width)
+            {
+                case SPI_TRANS_INT:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint32_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
+                    }
+                    break;
+                case SPI_TRANS_SHORT:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint16_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
+                    }
+                    break;
+                default:
+                    for(uint32_t i = 0; i < command_len; i++)
+                    {
+                        ((uint8_t *)(uintptr_t)g_instance.command.addr)[i] = spi_handle->dr[0];
+                    }
+                    break;
+            }
+        }
+    }
+    if(g_instance.callback != NULL)
+    {
+        g_instance.callback((void *)&g_instance.command);
+    }
+    spi_slave_idle_mode();
+}
+
+static void spi_slave_cs_irq(void)
+{
+    volatile spi_t *spi_handle = spi[2];
+    if (g_instance.status == IDLE && spi_handle->rxflr == 8)
+		g_instance.status = COMMAND;
+    if(g_instance.status == IDLE)
+        spi_slave_idle_mode();
+    else if(g_instance.status == COMMAND)
+        spi_slave_command_mode();
+    else if(g_instance.status == TRANSFER)
+        spi_slave_transfer_mode();
+}
+
+void spi_slave_config(uint8_t int_pin, uint8_t ready_pin, dmac_channel_number_t dmac_channel, size_t data_bit_length, uint8_t *data, uint32_t len, spi_slave_receive_callback_t callback)
+{
+    g_instance.status = IDLE;
+    g_instance.config_ptr = data;
+    g_instance.config_len = len;
+    g_instance.work_mode = 6;
+    g_instance.slv_oe = 10;
+    g_instance.dfs = 16;
+    g_instance.data_bit_length = data_bit_length;
+    g_instance.ready_pin = ready_pin;
+    g_instance.int_pin = int_pin;
+    g_instance.callback = callback;
+    g_instance.dmac_channel = dmac_channel;
+    g_instance.is_dual = 0;
+    sysctl_reset(SYSCTL_RESET_SPI2);
+    sysctl_clock_enable(SYSCTL_CLOCK_SPI2);
+    sysctl_clock_set_threshold(SYSCTL_THRESHOLD_SPI2, 0);
+
+    uint32_t data_width = data_bit_length / 8;
+    volatile spi_t *spi_handle = spi[2];
+    spi_handle->ssienr = 0x00;
+    spi_handle->ctrlr0 = (0x0 << g_instance.work_mode) | (0x1 << g_instance.slv_oe) | ((data_bit_length - 1) << g_instance.dfs);
+    spi_handle->dmatdlr = 0x04;
+    spi_handle->dmardlr = 0x03;
+    spi_handle->dmacr = 0x00;
+    spi_handle->txftlr = 0x00;
+    spi_handle->rxftlr = 0x08 / data_width - 1;
+    spi_handle->imr = 0x00;
+    spi_handle->ssienr = 0x01;
+
+    gpiohs_set_drive_mode(g_instance.ready_pin, GPIO_DM_OUTPUT);
+    gpiohs_set_pin(g_instance.ready_pin, GPIO_PV_HIGH);
+
+    gpiohs_set_drive_mode(g_instance.int_pin, GPIO_DM_INPUT_PULL_UP);
+    gpiohs_set_pin_edge(g_instance.int_pin, GPIO_PE_RISING);
+    gpiohs_set_irq(g_instance.int_pin, 3, spi_slave_cs_irq);
+}
+
+void spi_slave_dual_config(uint8_t int_pin,
+                           uint8_t ready_pin,
+                           uint8_t mosi_pin,
+                           uint8_t miso_pin,
+                           dmac_channel_number_t dmac_channel,
+                           size_t data_bit_length,
+                           uint8_t *data,
+                           uint32_t len,
+                           spi_slave_receive_callback_t callback)
+{
+    spi_slave_config(int_pin, ready_pin, dmac_channel, data_bit_length, data, len, callback);
+    g_instance.is_dual = 1;
+    g_instance.mosi_pin = mosi_pin;
+    g_instance.miso_pin = miso_pin;
+}
+void spi_handle_data_dma(spi_device_num_t spi_num, spi_chip_select_t chip_select, spi_data_t data, plic_interrupt_t *cb)
+{
+    configASSERT(spi_num < SPI_DEVICE_MAX && spi_num != 2);
+    configASSERT(chip_select < SPI_CHIP_SELECT_MAX);
+    switch(data.transfer_mode)
+    {
+        case SPI_TMOD_TRANS_RECV:
+        case SPI_TMOD_EEROM:
+            configASSERT(data.tx_buf && data.tx_len && data.rx_buf && data.rx_len);
+            break;
+        case SPI_TMOD_TRANS:
+            configASSERT(data.tx_buf && data.tx_len);
+            break;
+        case SPI_TMOD_RECV:
+            configASSERT(data.rx_buf && data.rx_len);
+            break;
+        default:
+            configASSERT(!"Transfer Mode ERR");
+            break;
+    }
+    configASSERT(data.tx_channel < DMAC_CHANNEL_MAX && data.rx_channel < DMAC_CHANNEL_MAX);
+    volatile spi_t *spi_handle = spi[spi_num];
+
+    spinlock_lock(&g_spi_instance[spi_num].lock);
+    if(cb)
+    {
+        g_spi_instance[spi_num].spi_int_instance.callback = cb->callback;
+        g_spi_instance[spi_num].spi_int_instance.ctx = cb->ctx;
+    }
+    switch(data.transfer_mode)
+    {
+        case SPI_TMOD_RECV:
+            spi_set_tmod(spi_num, SPI_TMOD_RECV);
+            if(data.rx_len > 65536)
+                data.rx_len = 65536;
+            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
+            spi_handle->dmacr = 0x03;
+            spi_handle->ssienr = 0x01;
+            if(spi_get_frame_format(spi_num) == SPI_FF_STANDARD)
+                spi_handle->dr[0] = 0xffffffff;
+            if(cb)
+            {
+                dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
+                g_spi_instance[spi_num].dmac_channel = data.rx_channel;
+            }
+            sysctl_dma_select((sysctl_dma_channel_t)data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
+            spi_handle->ser = 1U << chip_select;
+            if(!cb)
+                dmac_wait_done(data.rx_channel);
+            break;
+        case SPI_TMOD_TRANS:
+            spi_set_tmod(spi_num, SPI_TMOD_TRANS);
+            spi_handle->dmacr = 0x2; /*enable dma transmit*/
+            spi_handle->ssienr = 0x01;
+
+            if(cb)
+            {
+                dmac_irq_register(data.tx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
+                g_spi_instance[spi_num].dmac_channel = data.tx_channel;
+            }
+            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+            if(data.fill_mode)
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            else
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            spi_handle->ser = 1U << chip_select;
+            if(!cb)
+            {
+                dmac_wait_done(data.tx_channel);
+                while((spi_handle->sr & 0x05) != 0x04)
+                    ;
+            }
+            break;
+        case SPI_TMOD_EEROM:
+            spi_set_tmod(spi_num, SPI_TMOD_EEROM);
+            if(data.rx_len > 65536)
+                data.rx_len = 65536;
+            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
+            spi_handle->dmacr = 0x3;
+            spi_handle->ssienr = 0x01;
+            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+            if(data.fill_mode)
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            else
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            if(cb)
+            {
+                dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
+                g_spi_instance[spi_num].dmac_channel = data.rx_channel;
+            }
+            sysctl_dma_select(data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
+            spi_handle->ser = 1U << chip_select;
+            if(!cb)
+            {
+                dmac_wait_done(data.tx_channel);
+                dmac_wait_done(data.rx_channel);
+            }
+
+            break;
+        case SPI_TMOD_TRANS_RECV:
+            spi_set_tmod(spi_num, SPI_TMOD_TRANS_RECV);
+            if(data.rx_len > 65536)
+                data.rx_len = 65536;
+
+            if(cb)
+            {
+                if(data.tx_len > data.rx_len)
+                {
+                    dmac_irq_register(data.tx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
+                    g_spi_instance[spi_num].dmac_channel = data.tx_channel;
+                } else
+                {
+                    dmac_irq_register(data.rx_channel, spi_dma_irq, &g_spi_instance[spi_num], cb->priority);
+                    g_spi_instance[spi_num].dmac_channel = data.rx_channel;
+                }
+            }
+            spi_handle->ctrlr1 = (uint32_t)(data.rx_len - 1);
+            spi_handle->dmacr = 0x3;
+            spi_handle->ssienr = 0x01;
+            sysctl_dma_select(data.tx_channel, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
+            if(data.fill_mode)
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_NOCHANGE, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            else
+                dmac_set_single_mode(data.tx_channel, data.tx_buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
+                                     DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, data.tx_len);
+            sysctl_dma_select(data.rx_channel, SYSCTL_DMA_SELECT_SSI0_RX_REQ + spi_num * 2);
+            dmac_set_single_mode(data.rx_channel, (void *)(&spi_handle->dr[0]), (void *)data.rx_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
+                                 DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, data.rx_len);
+            spi_handle->ser = 1U << chip_select;
+            if(!cb)
+            {
+                dmac_wait_done(data.tx_channel);
+                dmac_wait_done(data.rx_channel);
+            }
+            break;
+    }
+    if(!cb)
+    {
+        spinlock_unlock(&g_spi_instance[spi_num].lock);
+        spi_handle->ser = 0x00;
+        spi_handle->ssienr = 0x00;
+    }
+}

+ 11 - 0
lib/nncase/CMakeLists.txt

@@ -0,0 +1,11 @@
+include_directories(${SDK_ROOT}/third_party/xtl/include ${CMAKE_CURRENT_LIST_DIR}/nncase/include)
+
+FILE(GLOB_RECURSE NNCASE_SRC
+        "${CMAKE_CURRENT_LIST_DIR}/*.c"
+        "${CMAKE_CURRENT_LIST_DIR}/*.cpp"
+        )
+
+ADD_LIBRARY(nncase
+        ${NNCASE_SRC}
+        )
+TARGET_COMPILE_OPTIONS(nncase PRIVATE -O2)

+ 12 - 1
lib/nncase/include/datatypes.h

@@ -157,7 +157,7 @@ inline bool operator!=(const padding &lhs, const padding &rhs) noexcept
     return lhs.before != rhs.before || lhs.after != rhs.after;
 }
 
-template<class T>
+template <class T>
 bool operator==(const value_range<T> &lhs, const value_range<T> &rhs) noexcept
 {
     return lhs.min == rhs.min && lhs.max == rhs.max;
@@ -168,4 +168,15 @@ bool operator!=(const value_range<T> &lhs, const value_range<T> &rhs) noexcept
 {
     return lhs.min != rhs.min || lhs.max != rhs.max;
 }
+
+#ifndef DEFINE_ENUM_FLAG_OPERATORS
+#define DEFINE_ENUM_FLAG_OPERATORS(ENUMTYPE)                                                              \
+    inline ENUMTYPE operator|(ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a) | ((int)b)); }           \
+    inline ENUMTYPE &operator|=(ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) |= ((int)b)); } \
+    inline ENUMTYPE operator&(ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a) & ((int)b)); }           \
+    inline ENUMTYPE &operator&=(ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) &= ((int)b)); } \
+    inline ENUMTYPE operator~(ENUMTYPE a) { return ENUMTYPE(~((int)a)); }                                 \
+    inline ENUMTYPE operator^(ENUMTYPE a, ENUMTYPE b) { return ENUMTYPE(((int)a) ^ ((int)b)); }           \
+    inline ENUMTYPE &operator^=(ENUMTYPE &a, ENUMTYPE b) { return (ENUMTYPE &)(((int &)a) ^= ((int)b)); }
+#endif
 }

+ 8 - 0
lib/nncase/include/kernels/kernel_utils.h

@@ -18,6 +18,14 @@
 #include <cstddef>
 #include <datatypes.h>
 
+#ifdef __GNUC__
+#define CXX_RESTRICT __restrict__
+#elif _MSC_VER
+#define CXX_RESTRICT __restrict
+#else
+#define CXX_RESTRICT fvck
+#endif
+
 namespace nncase
 {
 namespace kernels

+ 74 - 29
lib/nncase/include/kernels/neutral/neutral_kernels.h

@@ -17,6 +17,9 @@
 #include <cmath>
 #include <runtime/runtime_op_utility.h>
 #include <xtl/xspan.hpp>
+#ifdef __riscv
+#include "../riscv/neutral_kernels.h"
+#endif
 
 namespace nncase
 {
@@ -28,20 +31,35 @@ namespace kernels
         void binary(const float *input_a, const float *input_b, float *output, const runtime_shape_t &in_a_shape,
             const runtime_shape_t &in_b_shape, const runtime_shape_t &out_shape, const value_range<float> &fused_activation, TOp &&op)
         {
-            for (int32_t d0 = 0; d0 < out_shape[0]; d0++)
+            // opt. no broadcast
+            if (in_a_shape == in_b_shape)
             {
-                for (int32_t d1 = 0; d1 < out_shape[1]; d1++)
+                auto size = kernels::details::compute_size(in_a_shape);
+                for (size_t i = 0; i < size; i++)
                 {
-                    for (int32_t d2 = 0; d2 < out_shape[2]; d2++)
+                    const auto a = input_a[i];
+                    const auto b = input_b[i];
+                    output[i] = kernels::details::apply_activation(op(a, b), fused_activation);
+                }
+            }
+            // fallback
+            else
+            {
+                for (int32_t d0 = 0; d0 < out_shape[0]; d0++)
+                {
+                    for (int32_t d1 = 0; d1 < out_shape[1]; d1++)
                     {
-                        for (int32_t d3 = 0; d3 < out_shape[3]; d3++)
+                        for (int32_t d2 = 0; d2 < out_shape[2]; d2++)
                         {
-                            runtime_shape_t in_off = { d0, d1, d2, d3 };
-                            const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);
-                            const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);
-                            const auto a = input_a[offset(in_a_shape, in_a_off)];
-                            const auto b = input_b[offset(in_b_shape, in_b_off)];
-                            output[offset(out_shape, in_off)] = kernels::details::apply_activation(op(a, b), fused_activation);
+                            for (int32_t d3 = 0; d3 < out_shape[3]; d3++)
+                            {
+                                runtime_shape_t in_off = { d0, d1, d2, d3 };
+                                const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);
+                                const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);
+                                const auto a = input_a[offset(in_a_shape, in_a_off)];
+                                const auto b = input_b[offset(in_b_shape, in_b_off)];
+                                output[offset(out_shape, in_off)] = kernels::details::apply_activation(op(a, b), fused_activation);
+                            }
                         }
                     }
                 }
@@ -53,24 +71,43 @@ namespace kernels
             const runtime_shape_t &in_b_shape, const runtime_shape_t &out_shape, int32_t input_a_offset, int32_t input_a_mul, int32_t input_a_shift,
             int32_t input_b_offset, int32_t input_b_mul, int32_t input_b_shift, int32_t output_mul, int32_t output_shift, int32_t output_offset, TOp &&op)
         {
-            for (int32_t d0 = 0; d0 < out_shape[0]; d0++)
+            // opt. no broadcast
+            if (in_a_shape == in_b_shape)
             {
-                for (int32_t d1 = 0; d1 < out_shape[1]; d1++)
+                auto size = kernels::details::compute_size(in_a_shape);
+                for (size_t i = 0; i < size; i++)
                 {
-                    for (int32_t d2 = 0; d2 < out_shape[2]; d2++)
+                    auto a = (int32_t)input_a[i];
+                    auto b = (int32_t)input_b[i];
+                    a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift);
+                    b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift);
+
+                    auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift);
+                    output[i] = (uint8_t)std::clamp(output_val + output_offset, 0, 255);
+                }
+            }
+            // fallback
+            else
+            {
+                for (int32_t d0 = 0; d0 < out_shape[0]; d0++)
+                {
+                    for (int32_t d1 = 0; d1 < out_shape[1]; d1++)
                     {
-                        for (int32_t d3 = 0; d3 < out_shape[3]; d3++)
+                        for (int32_t d2 = 0; d2 < out_shape[2]; d2++)
                         {
-                            runtime_shape_t in_off = { d0, d1, d2, d3 };
-                            const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);
-                            const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);
-                            auto a = (int32_t)input_a[offset(in_a_shape, in_a_off)];
-                            auto b = (int32_t)input_b[offset(in_b_shape, in_b_off)];
-                            a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift);
-                            b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift);
-
-                            auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift);
-                            output[offset(out_shape, in_off)] = (uint8_t)std::clamp(output_val + output_offset, 0, 255);
+                            for (int32_t d3 = 0; d3 < out_shape[3]; d3++)
+                            {
+                                runtime_shape_t in_off = { d0, d1, d2, d3 };
+                                const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);
+                                const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);
+                                auto a = (int32_t)input_a[offset(in_a_shape, in_a_off)];
+                                auto b = (int32_t)input_b[offset(in_b_shape, in_b_off)];
+                                a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift);
+                                b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift);
+
+                                auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift);
+                                output[offset(out_shape, in_off)] = (uint8_t)std::clamp(output_val + output_offset, 0, 255);
+                            }
                         }
                     }
                 }
@@ -284,14 +321,18 @@ namespace kernels
         }
 
         template <class TQ>
-        void dequantize(const TQ *input, float *output, size_t count, const quant_param_t &param)
+        void dequantize(const TQ *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, const quant_param_t &param)
         {
+#if __riscv
+            riscv_dequantize(input, output, count, param);
+#else
             float div = 1.f / param.scale;
 
             for (size_t i = 0; i < count; i++)
             {
                 output[i] = (input[i] - param.zero_point) * div;
             }
+#endif
         }
 
         inline void matmul(const float *input_a, const float *input_b, float *output, const float *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, const value_range<float> &fused_activation)
@@ -377,13 +418,17 @@ namespace kernels
         }
 
         template <class TQ>
-        void quantize(const float *input, TQ *output, size_t count, const quant_param_t &param)
+        void quantize(const float *CXX_RESTRICT input, TQ *CXX_RESTRICT output, size_t count, const quant_param_t &param)
         {
+#if __riscv
+            riscv_quantize(input, output, count, param);
+#else
             for (size_t i = 0; i < count; i++)
             {
                 int32_t tmp = (int32_t)roundf(input[i] * param.scale + param.zero_point);
                 output[i] = std::clamp(tmp, (int32_t)std::numeric_limits<TQ>::lowest(), (int32_t)std::numeric_limits<TQ>::max());
             }
+#endif
         }
 
         template <class TReducer>
@@ -411,7 +456,7 @@ namespace kernels
         }
 
         template <class TOp>
-        void unary(const float *input, float *output, size_t count, TOp &&op)
+        void unary(const float *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, TOp &&op)
         {
             for (size_t i = 0; i < count; i++)
                 output[i] = op(input[i]);
@@ -564,7 +609,7 @@ namespace kernels
         }
 
         template <class T>
-        void transpose(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_shape_t &perm)
+        void transpose(const T *CXX_RESTRICT input, T *CXX_RESTRICT output, const runtime_shape_t &in_shape, const runtime_shape_t &perm)
         {
             runtime_shape_t out_shape;
             for (size_t i = 0; i < 4; i++)
@@ -591,7 +636,7 @@ namespace kernels
         }
 
         template <class T>
-        void strided_slice(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_shape_t &begin, const runtime_shape_t &end, const runtime_shape_t &strides)
+        void strided_slice(const T *CXX_RESTRICT input, T *CXX_RESTRICT output, const runtime_shape_t &in_shape, const runtime_shape_t &begin, const runtime_shape_t &end, const runtime_shape_t &strides)
         {
             auto loop_cond = [](int32_t i, int32_t stop, int32_t stride) {
                 return stride > 0 ? i < stop : i > stop;

+ 83 - 0
lib/nncase/include/kernels/riscv/neutral_kernels.h

@@ -0,0 +1,83 @@
+/* Copyright 2019 Canaan Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include "../kernel_utils.h"
+#include <cmath>
+#include <runtime/runtime_op_utility.h>
+#include <xtl/xspan.hpp>
+
+namespace nncase
+{
+namespace kernels
+{
+    namespace neutral
+    {
+        template <class TQ>
+        void riscv_dequantize(const TQ *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, const quant_param_t &param)
+        {
+            float scale = 1.f / param.scale;
+            float zero = -param.zero_point * scale;
+
+            for (size_t i = 0; i < count / 2; i++)
+            {
+                // handwritten pipeline for in order CPU
+                auto in1_q = input[i * 2];
+                auto in2_q = input[i * 2 + 1];
+                auto in1 = (float)in1_q;
+                auto in2 = (float)in2_q;
+                auto out1 = in1 * scale + zero;
+                auto out2 = in2 * scale + zero;
+
+                output[i * 2] = out1;
+                output[i * 2 + 1] = out2;
+            }
+
+            if (count % 2)
+                output[count - 1] = input[count - 1] * scale + zero;
+        }
+
+        template <class TQ>
+        void riscv_quantize(const float *CXX_RESTRICT input, TQ *CXX_RESTRICT output, size_t count, const quant_param_t &param)
+        {
+            float scale = param.scale;
+            float zero = param.zero_point;
+
+            for (size_t i = 0; i < count / 2; i++)
+            {
+                auto in1 = input[i * 2];
+                auto in2 = input[i * 2 + 1];
+                in1 = in1 * scale + zero;
+                in2 = in2 * scale + zero;
+                int32_t out1, out2;
+                asm volatile("fcvt.w.s %0, %1, rne"
+                             : "=r"(out1)
+                             : "f"(in1));
+                asm volatile("fcvt.w.s %0, %1, rne"
+                             : "=r"(out2)
+                             : "f"(in2));
+
+                output[i * 2] = std::clamp(out1, (int32_t)std::numeric_limits<TQ>::lowest(), (int32_t)std::numeric_limits<TQ>::max());
+                output[i * 2 + 1] = std::clamp(out2, (int32_t)std::numeric_limits<TQ>::lowest(), (int32_t)std::numeric_limits<TQ>::max());
+            }
+
+            if (count % 2)
+            {
+                auto in = (int32_t)roundf(input[count - 1] * scale + zero);
+                output[count - 1] = std::clamp(in, (int32_t)std::numeric_limits<TQ>::lowest(), (int32_t)std::numeric_limits<TQ>::max());
+            }
+        }
+    }
+}
+}

+ 2 - 1
lib/nncase/include/runtime/interpreter.h

@@ -31,9 +31,9 @@ namespace runtime
 
     class interpreter_base
     {
+    public:
         using clock_t = std::chrono::system_clock;
 
-    public:
         bool try_load_model(const uint8_t *buffer);
         uint32_t model_size(const uint8_t *buffer);
 
@@ -59,6 +59,7 @@ namespace runtime
     protected:
         virtual bool initialize();
         virtual xtl::span<uint8_t> memory_at(const memory_range &range) const noexcept;
+        virtual clock_t::time_point get_now() const noexcept;
 
     private:
         void step();

+ 1 - 0
lib/nncase/include/runtime/k210/interpreter.h

@@ -40,6 +40,7 @@ namespace runtime
             dmac_channel_number_t dma_ch() const noexcept { return dma_ch_; }
             void dma_ch(dmac_channel_number_t dma_ch) noexcept { dma_ch_ = dma_ch; }
             k210_interpreter_context &context() noexcept { return context_; }
+            clock_t::time_point get_now() const noexcept override;
 #endif
 
         protected:

+ 23 - 0
lib/nncase/include/runtime/k210/k210_runtime_op_utility.h

@@ -80,6 +80,29 @@ namespace runtime
             }
         }
 
+        inline std::array<int32_t, 2> get_kpu_padding(kpu_pool_type_t filter, int32_t size)
+        {
+            switch (filter)
+            {
+            case kpu_pool_bypass:
+                return { 0, 0 };
+            case kpu_pool_max_2_s2:
+            case kpu_pool_mean_2_s2:
+            case kpu_pool_left_top_2_s2:
+            case kpu_pool_right_top_2_s2:
+                return { 0, 0 };
+            case kpu_pool_max_4_s4:
+            case kpu_pool_mean_4_s4:
+            case kpu_pool_left_top_4_s4:
+                return { 0, 0 };
+            case kpu_pool_mean_2_s1:
+            case kpu_pool_max_2_s1:
+                return { 0, 1 };
+            default:
+                NNCASE_THROW(std::runtime_error, "Invalid kpu pool type");
+            }
+        }
+
         inline int32_t get_kpu_rows(int32_t width, int32_t height, int32_t channels)
         {
             auto layout = get_kpu_row_layout(width);

+ 20 - 20
lib/nncase/include/runtime/runtime_op.def

@@ -1,24 +1,24 @@
 BEGINE_DEFINE_TARGET(neutral)
-    DEFINE_NEUTRAL_RUNTIME_OP(binary,				Binary,				0x0)
-    DEFINE_NEUTRAL_RUNTIME_OP(concat,				Concat,				0x1)
-    DEFINE_NEUTRAL_RUNTIME_OP(conv2d,				Conv2D,				0x2)
-    DEFINE_NEUTRAL_RUNTIME_OP(dequantize,			Dequantize,			0x3)
-    DEFINE_NEUTRAL_RUNTIME_OP(matmul,				MatMul,				0x4)
-    DEFINE_NEUTRAL_RUNTIME_OP(pad,					Pad,				0x5)
-    DEFINE_NEUTRAL_RUNTIME_OP(quantize,				Quantize,			0x6)
-    DEFINE_NEUTRAL_RUNTIME_OP(reduce,				Reduce,				0x7)
-    DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d,		ReduceWindow2D,		0x8)
-    DEFINE_NEUTRAL_RUNTIME_OP(memory_copy,			MemoryCopy,			0x9)
-    DEFINE_NEUTRAL_RUNTIME_OP(resize_image,			ResizeImage,		0x0A)
-    DEFINE_NEUTRAL_RUNTIME_OP(softmax,				Softmax,			0x0B)
-    DEFINE_NEUTRAL_RUNTIME_OP(transpose,			Transpose,			0x0C)
-    DEFINE_NEUTRAL_RUNTIME_OP(strided_slice,		StridedSlice,		0x0D)
-    DEFINE_NEUTRAL_RUNTIME_OP(unary,				Unary,				0x0E)
-    DEFINE_NEUTRAL_RUNTIME_OP(quantized_conv2d,		QuantizedConv2D,	0x0F)
-    DEFINE_NEUTRAL_RUNTIME_OP(quantized_matmul,		QuantizedMatMul,	0x10)
-    DEFINE_NEUTRAL_RUNTIME_OP(quantized_binary,		QuantizedBinary,	0x11)
-    // DEFINE_NEUTRAL_RUNTIME_OP(table_lookup1d,	TableLookup1D,		0x12)
-    DEFINE_NEUTRAL_RUNTIME_OP(conv2d_transpose,		QuantizedBinary,	0x13)
+    DEFINE_NEUTRAL_RUNTIME_OP(binary,               Binary,             0x0)
+    DEFINE_NEUTRAL_RUNTIME_OP(concat,               Concat,             0x1)
+    DEFINE_NEUTRAL_RUNTIME_OP(conv2d,               Conv2D,             0x2)
+    DEFINE_NEUTRAL_RUNTIME_OP(dequantize,           Dequantize,         0x3)
+    DEFINE_NEUTRAL_RUNTIME_OP(matmul,               MatMul,             0x4)
+    DEFINE_NEUTRAL_RUNTIME_OP(pad,                  Pad,                0x5)
+    DEFINE_NEUTRAL_RUNTIME_OP(quantize,             Quantize,           0x6)
+    DEFINE_NEUTRAL_RUNTIME_OP(reduce,               Reduce,             0x7)
+    DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d,      ReduceWindow2D,     0x8)
+    DEFINE_NEUTRAL_RUNTIME_OP(memory_copy,          MemoryCopy,         0x9)
+    DEFINE_NEUTRAL_RUNTIME_OP(resize_image,         ResizeImage,        0x0A)
+    DEFINE_NEUTRAL_RUNTIME_OP(softmax,              Softmax,            0x0B)
+    DEFINE_NEUTRAL_RUNTIME_OP(transpose,            Transpose,          0x0C)
+    DEFINE_NEUTRAL_RUNTIME_OP(strided_slice,        StridedSlice,       0x0D)
+    DEFINE_NEUTRAL_RUNTIME_OP(unary,                Unary,              0x0E)
+    DEFINE_NEUTRAL_RUNTIME_OP(quantized_conv2d,     QuantizedConv2D,    0x0F)
+    DEFINE_NEUTRAL_RUNTIME_OP(quantized_matmul,     QuantizedMatMul,    0x10)
+    DEFINE_NEUTRAL_RUNTIME_OP(quantized_binary,     QuantizedBinary,    0x11)
+    // DEFINE_NEUTRAL_RUNTIME_OP(table_lookup1d,    TableLookup1D,      0x12)
+    DEFINE_NEUTRAL_RUNTIME_OP(conv2d_transpose,     QuantizedBinary,    0x13)
 END_DEFINE_TARGET()
 
 // CPU

+ 6 - 12
lib/nncase/include/runtime/runtime_op_utility.h

@@ -99,20 +99,14 @@ namespace runtime
             }
             else
             {
-                value >>= shift - 1;
-                if (value & 0x1)
-                {
-                    if (value < 0)
-                        value = (value >> 1) - 1;
-                    else
-                        value = (value >> 1) + 1;
-                }
-                else
-                {
-                    value >>= 1;
-                }
+                value += 1 << (shift - 1);
+                value >>= shift;
             }
         }
+        else if (shift < 0)
+        {
+            value = value << (-shift);
+        }
 
         return value;
     }

+ 10 - 8
lib/nncase/include/targets/target.h

@@ -13,10 +13,11 @@
  * limitations under the License.
  */
 #pragma once
-#include <ir/quantizer.h>
+#include <hlir/quantizer.h>
+#include <hlir/transforms/pass.h>
+#include <llir/transforms/pass.h>
 #include <memory>
 #include <scheduler/memory_allocator.h>
-#include <transforms/transform.h>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -33,16 +34,17 @@ class target
 public:
     target(const target_options &options)
         : options_(options) {}
+    virtual ~target() = default;
 
     virtual void fill_allocators(std::unordered_map<memory_type_t, scheduler::memory_allocator *> &allocators, std::vector<std::unique_ptr<scheduler::memory_allocator>> &allocator_holders) = 0;
     virtual void registry_codegen_ops() = 0;
     virtual void registry_evaluator_ops() = 0;
-    virtual void add_default_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
-    virtual void add_optimize1_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
-    virtual void add_optimize2_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
-    virtual void add_quantization_checkpoint_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
-    virtual void add_quantization_transforms(ir::quantizer &quantizer, std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
-    virtual void add_quantization_broadcast(std::unordered_set<ir::node_opcode> &opcodes) = 0;
+    virtual void optimize_target_independent(hlir::transforms::pass_manager &pass_mgr) = 0;
+    virtual void optimize_target_dependent(hlir::transforms::pass_manager &pass_mgr) = 0;
+    virtual void add_quantization_checkpoints(hlir::transforms::pass_manager &pass_mgr) = 0;
+    virtual void optimize_quantize(hlir::quantizer &quantizer, hlir::transforms::pass_manager &pass_mgr) = 0;
+    virtual void add_quantization_broadcast(std::unordered_set<hlir::node_opcode> &opcodes) = 0;
+    virtual void optimize_llir(llir::transforms::pass_manager &pass_mgr) = 0;
 
 protected:
     target_options options_;

+ 9 - 10
lib/nncase/nncase.cpp

@@ -17,6 +17,7 @@
 #include <runtime/target_interpreter.h>
 #include <stdio.h>
 #include <cstring>
+#include <utils.h>
 
 using namespace nncase;
 using namespace nncase::runtime;
@@ -27,6 +28,12 @@ namespace
 {
 void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata)
 {
+    if (is_memory_cache((uintptr_t)src))
+    {
+        std::copy_n(src, input_size, dest);
+        src -= 0x40000000;
+    }
+
     dmac_set_irq(dma_ch, callback, userdata, 1);
     dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT,
         DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8);
@@ -85,16 +92,8 @@ public:
         else if (input.memory_type == mem_k210_kpu)
         {
             auto shape = interpreter_.input_shape_at(0);
-            if (shape[3] % 64 == 0)
-            {
-                kpu_upload_dma(dma_ch, src, mem.data(), mem.size(), nullptr, this);
-                on_upload_done();
-            }
-            else
-            {
-                kernels::k210::kpu_upload(src, mem.data(), shape);
-                on_upload_done();
-            }
+            kernels::k210::kpu_upload(src, mem.data(), shape);
+            on_upload_done();
 
             return 0;
         }

+ 14 - 5
lib/nncase/runtime/interpreter.cpp

@@ -53,9 +53,10 @@ bool interpreter_base::try_load_model(const uint8_t *buffer)
 uint32_t interpreter_base::model_size(const uint8_t *buffer)
 {
     uint32_t size = (uint32_t)(node_body_start_ - buffer);
-    for(int i=0; i<nodes_size(); i++)
+    for (int i = 0; i < nodes_size(); i++)
     {
-        struct node_header cnt_layer_header = node_headers_[i];;
+        struct node_header cnt_layer_header = node_headers_[i];
+        ;
         size += cnt_layer_header.body_size;
     }
     return size;
@@ -79,6 +80,11 @@ void interpreter_base::run(run_callback_t callback, error_callback_t on_error, n
     step();
 }
 
+interpreter_base::clock_t::time_point interpreter_base::get_now() const noexcept
+{
+    return clock_t::now();
+}
+
 void interpreter_base::step()
 {
     auto result = kcr_done;
@@ -87,17 +93,20 @@ void interpreter_base::step()
     {
         if (!last_time_)
         {
-            last_time_ = clock_t::now();
+            last_time_ = get_now();
         }
         else
         {
-            auto now = clock_t::now();
+            auto now = get_now();
             auto duration = now - *last_time_;
             total_duration_ += duration;
-            last_time_ = now;
 
             if (node_profile_)
+            {
                 node_profile_(last_op_, duration, userdata_);
+                now = get_now();
+                last_time_ = now;
+            }
         }
 
         if (cnt_node_ == nodes_size())

+ 11 - 0
lib/nncase/runtime/k210/interpreter.cpp

@@ -13,6 +13,9 @@
  * limitations under the License.
  */
 #include <runtime/k210/interpreter.h>
+#if !NNCASE_TARGET_K210_SIMULATOR
+#include <sysctl.h>
+#endif
 
 using namespace nncase;
 using namespace nncase::runtime;
@@ -48,3 +51,11 @@ xtl::span<uint8_t> interpreter::memory_at(const memory_range &range) const noexc
 
     return interpreter_base::memory_at(range);
 }
+
+#if !NNCASE_TARGET_K210_SIMULATOR
+interpreter::clock_t::time_point interpreter::get_now() const noexcept
+{
+    auto micro = std::chrono::microseconds(sysctl_get_time_us());
+    return clock_t::time_point(std::chrono::duration_cast<clock_t::duration>(micro));
+}
+#endif

+ 41 - 20
lib/nncase/runtime/k210/k210_ops.cpp

@@ -112,16 +112,7 @@ namespace runtime
         {
             auto input = interpreter.memory_at<uint8_t>(options.input);
             auto output = interpreter.memory_at<uint8_t>(options.output);
-#if !NNCASE_TARGET_K210_SIMULATOR
-            if (options.in_shape[3] % 64 == 0)
-            {
-                auto &ctx = interpreter.context();
-                ctx.interpreter = &interpreter;
-                ctx.step = step;
-                kpu_upload_dma(interpreter.dma_ch(), input.data(), output.data(), input.size(), kpu_dma_plic_thunk, &ctx);
-                return kcr_done;
-            }
-#endif
+
             kernels::k210::kpu_upload(input.data(), output.data(), options.in_shape);
             return kcr_done;
         }
@@ -155,6 +146,15 @@ namespace runtime
             auto conv_output_tmp = std::make_unique<uint8_t[]>(conv_out_fmap_size);
             auto output_tmp = std::make_unique<uint8_t[]>(out_fmap_size);
 
+            auto batch = in_shape[0];
+            auto in_size_per_batch = kernels::details::compute_size(in_shape) / batch;
+            auto conv_output_tmp_size_per_batch = conv_out_fmap_size / batch;
+            auto out_size_per_batch = kernels::details::compute_size(out_shape) / batch;
+            auto p_input = input_tmp.get();
+            auto p_workspace = workspace.get();
+            auto p_conv_ouput_tmp = conv_output_tmp.get();
+            auto p_output_tmp = output_tmp.get();
+
             kernels::k210::kpu_download(input.data(), input_tmp.get(), in_shape);
             auto filter_size = get_kpu_filter_size((kpu_filter_type_t)options.layer.kernel_pool_type_cfg.data.kernel_type);
             auto pad_value = (uint8_t)options.layer.kernel_pool_type_cfg.data.pad_value;
@@ -191,15 +191,24 @@ namespace runtime
 
 #define KPU_CONV2D_IMPL(is_depthwise_val, filter_size_val)                                                                                        \
     if (is_depthwise == is_depthwise_val && filter_size == filter_size_val)                                                                       \
-    kernels::k210::kpu_conv2d<is_depthwise_val, filter_size_val>(input_tmp.get(), workspace.get(), conv_output_tmp.get(), options.weights.data(), \
+    kernels::k210::kpu_conv2d<is_depthwise_val, filter_size_val>(p_input, p_workspace, p_conv_ouput_tmp, options.weights.data(), \
         in_h, in_w, in_ch, out_ch, pad_value, arg_x, shift_x, arg_w, shift_w, arg_add, batchnorm.get(), activation)
 
-            KPU_CONV2D_IMPL(true, 1);
-            else KPU_CONV2D_IMPL(true, 3);
-            else KPU_CONV2D_IMPL(false, 1);
-            else KPU_CONV2D_IMPL(false, 3);
+            for (size_t n = 0; n < batch; n++)
+            {
+                KPU_CONV2D_IMPL(true, 1);
+                else KPU_CONV2D_IMPL(true, 3);
+                else KPU_CONV2D_IMPL(false, 1);
+                else KPU_CONV2D_IMPL(false, 3);
+
+                kernels::k210::kpu_pool2d(p_conv_ouput_tmp, p_output_tmp, in_h, in_w, out_ch, (kpu_pool_type_t)options.layer.kernel_pool_type_cfg.data.pool_type);
+
+                p_input += in_size_per_batch;
+                p_workspace += conv_output_tmp_size_per_batch;
+                p_conv_ouput_tmp += conv_output_tmp_size_per_batch;
+                p_output_tmp += out_size_per_batch;
+            }
 
-            kernels::k210::kpu_pool2d(conv_output_tmp.get(), output_tmp.get(), in_h, in_w, out_ch, (kpu_pool_type_t)options.layer.kernel_pool_type_cfg.data.pool_type);
             kernels::k210::kpu_upload(output_tmp.get(), kpu_out.data(), out_shape);
             if (options.main_mem_output.size)
             {
@@ -212,11 +221,23 @@ namespace runtime
             auto &ctx = interpreter.context();
             ctx.interpreter = &interpreter;
             ctx.step = step;
-            g_ai_done = 0;
 
-            kpu_conv2d_normal(options.layer, kpu_plic_thunk, &ctx);
-            while (!g_ai_done);
-            
+            auto batch = options.batches;
+            auto in_per_batch = get_kpu_rows(in_w, in_h, in_ch);
+            auto out_per_batch = get_kpu_rows(out_w, out_h, out_ch);
+
+            for (size_t n = 0; n < batch; n++)
+            {
+                g_ai_done = 0;
+
+                kpu_conv2d_normal(options.layer, kpu_plic_thunk, &ctx);
+                while (!g_ai_done)
+                    ;
+
+                options.layer.image_addr.data.image_src_addr += in_per_batch;
+                options.layer.image_addr.data.image_dst_addr += out_per_batch;
+            }
+
             if (options.main_mem_output.size)
             {
                 auto main_output = interpreter.memory_at<uint8_t>(options.main_mem_output);