RT-Thread
/
rt-thread
zrcadlo https://github-proxy.rt-thread.io/RT-Thread/rt-thread.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994
							/*
 * Copyright (c) 2006-2025 RT-Thread Development Team
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Change Logs:
 * Date           Author       Notes
 * 2021-01-30     lizhirui     first version
 * 2022-12-13     WangXiaoyao  Port to new mm
 * 2023-10-12     Shell        Add permission control API
 */

#include <rtthread.h>
#include <stddef.h>
#include <stdint.h>

#define DBG_TAG "hw.mmu"
#define DBG_LVL DBG_INFO
#include <rtdbg.h>

#include <board.h>
#include <cache.h>
#include <mm_aspace.h>
#include <mm_page.h>
#include <mmu.h>
#include <riscv_mmu.h>
#include <tlb.h>

#ifdef RT_USING_SMART
#include <board.h>
#include <ioremap.h>
#include <lwp_user_mm.h>
#endif

#ifndef RT_USING_SMART
#define USER_VADDR_START 0
#endif

static size_t _unmap_area(struct rt_aspace *aspace, void *v_addr);

/* Define the structure of early page table */
struct page_table
{
    unsigned long page[ARCH_PAGE_SIZE / sizeof(unsigned long)];
};
static struct page_table *__init_page_array;

#ifndef RT_USING_SMP
static void *current_mmu_table = RT_NULL;
#else
static void *current_mmu_table[RT_CPUS_NR] = { RT_NULL };
#endif /* RT_USING_SMP */

volatile __attribute__((aligned(4 * 1024)))
rt_ubase_t MMUTable[__SIZE(VPN2_BIT) * RT_CPUS_NR];

/**
 * @brief Switch the current address space to the specified one.
 *
 * This function is responsible for switching the address space by updating the page table
 * and related hardware state. The behavior depends on whether the architecture supports
 * Address Space Identifiers (ASIDs), devided by macro definition of ARCH_USING_ASID.
 *
 * @param aspace Pointer to the address space structure containing the new page table.
 *
 * @note If ASID is supported (`ARCH_USING_ASID` is defined), the function will call
 *       `rt_hw_asid_switch_pgtbl` to switch the page table and update the ASID.
 *       Otherwise, it will directly write the `satp` CSR to switch the page table
 *       and invalidate the TLB.
 */
#ifdef ARCH_USING_ASID
void rt_hw_aspace_switch(rt_aspace_t aspace)
{
    uintptr_t page_table = (uintptr_t)rt_kmem_v2p(aspace->page_table);
    current_mmu_table = aspace->page_table;

    rt_hw_asid_switch_pgtbl(aspace, page_table);
}

#else /* !ARCH_USING_ASID */
void rt_hw_aspace_switch(rt_aspace_t aspace)
{
    // It is necessary to find the MMU page table specific to each core.
    uint32_t hartid = rt_cpu_get_id();
    uintptr_t ptr = (uintptr_t)aspace->page_table + (uintptr_t)(hartid * ARCH_PAGE_SIZE);
    uintptr_t page_table = (uintptr_t)rt_kmem_v2p((void *)ptr);
#ifndef RT_USING_SMP
    current_mmu_table = aspace->page_table;
#else
    current_mmu_table[rt_hw_cpu_id()] = (void *)ptr;
#endif

    write_csr(satp, (((size_t)SATP_MODE) << SATP_MODE_OFFSET) |
                        ((rt_ubase_t)page_table >> PAGE_OFFSET_BIT));
    rt_hw_tlb_invalidate_all_local();
}

void rt_hw_asid_init(void)
{
}
#endif /* ARCH_USING_ASID */

/* get current page table. */
void *rt_hw_mmu_tbl_get()
{
#ifndef RT_USING_SMP
    return current_mmu_table;
#else
    return current_mmu_table[rt_hw_cpu_id()];
#endif /* RT_USING_SMP */
}

/* Map a single virtual address page to a physical address page in the page table. */
static int _map_one_page(struct rt_aspace *aspace, void *va, void *pa,
                         size_t attr)
{
    rt_ubase_t l1_off, l2_off, l3_off;
    rt_ubase_t *mmu_l1, *mmu_l2, *mmu_l3;

    l1_off = GET_L1((size_t)va);
    l2_off = GET_L2((size_t)va);
    l3_off = GET_L3((size_t)va);
    /* Create a separate page table for each hart to facilitate access to the .percpu section. */
    for (int hartid = 0; hartid < RT_CPUS_NR; hartid++)
    {
        mmu_l1 = (rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(hartid * ARCH_PAGE_SIZE)) + l1_off;

        if (PTE_USED(*mmu_l1))
        {
            mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);
        }
        else
        {
            mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0);

            if (mmu_l2)
            {
                rt_memset(mmu_l2, 0, PAGE_SIZE);
                rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE);
                *mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET),
                                    PAGE_DEFAULT_ATTR_NEXT);
                rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1));
            }
            else
            {
                return -1;
            }
        }

        if (PTE_USED(*(mmu_l2 + l2_off)))
        {
            RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off)));
            mmu_l3 =
                (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET);
        }
        else
        {
            mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0);

            if (mmu_l3)
            {
                rt_memset(mmu_l3, 0, PAGE_SIZE);
                rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE);
                *(mmu_l2 + l2_off) =
                    COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET),
                            PAGE_DEFAULT_ATTR_NEXT);
                rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2));
                /* declares a reference to parent page table */
                rt_page_ref_inc((void *)mmu_l2, 0);
            }
            else
            {
                return -1;
            }
        }

        RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off)));
        /* declares a reference to parent page table */
        rt_page_ref_inc((void *)mmu_l3, 0);
        *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr);
        rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off)));
    }

    return 0;
}

#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
static int _map_percpu_area(rt_ubase_t *table, void *va, void *pa, int cpu_id)
{
    unsigned long page;
    rt_ubase_t off, level_shift;

    level_shift = PPN2_SHIFT;

    // map pages - 4KB
    for (int level = 0; level < 2; ++level)
    {
        off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
        if (table[off] & PTE_V)
        {
            /* Step into the next level page table */
            table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
            level_shift -= VPN_BITS;
            continue;
        }
        if (!(page = get_free_page()))
        {
            return MMU_MAP_ERROR_NOPAGE;
        }
        rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
        table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_BITS) | PTE_V;

        rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));

        /* Step into the next level page table */
        table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);

        level_shift -= VPN_BITS;
    }

    off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
    table[off] = (((rt_ubase_t)pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;

    rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));

    return ARCH_PAGE_SIZE;
}

// Ensure that the .percpu section is mapped in the specific address for each core.
static void rt_hw_percpu_mmu_init_check(void)
{
    size_t mapped, size;
    void *page_table, *vaddr, *paddr;
    static rt_bool_t inited = RT_FALSE;

    if (inited)
    {
        return;
    }
    inited = RT_TRUE;

    page_table = rt_kernel_space.page_table;

    for (int hartid = 0; hartid < RT_CPUS_NR; ++hartid)
    {
        vaddr = &__percpu_start;
        paddr = vaddr + rt_kmem_pvoff();
        size = (size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
        /* Offset to per-CPU partition for current CPU */
        paddr += size * hartid;

        while (size > 0)
        {
            MM_PGTBL_LOCK(&rt_kernel_space);
            mapped = _map_percpu_area(page_table, vaddr, paddr, hartid);
            MM_PGTBL_UNLOCK(&rt_kernel_space);

            RT_ASSERT(mapped > 0);

            size -= mapped;
            vaddr += mapped;
            paddr += mapped;
        }

        page_table += ARCH_PAGE_SIZE;
    }
}
#endif /* RT_USING_SMP && RT_USING_SMART */

/**
 * @brief Maps a virtual address space to a physical address space.
 *
 * This function maps a specified range of virtual addresses to a range of physical addresses
 * and sets the attributes of the page table entries (PTEs). If an error occurs during the
 * mapping process, the function will automatically roll back any partially completed mappings.
 *
 * @param aspace Pointer to the address space structure containing the page table information.
 * @param v_addr The starting virtual address to be mapped.
 * @param p_addr The starting physical address to be mapped.
 * @param size The size of the memory to be mapped (in bytes).
 * @param attr The attributes of the page table entries (e.g., read/write permissions, cache policies).
 *
 * @return On success, returns the starting virtual address `v_addr`;
 *         On failure, returns `NULL`.
 *
 * @note This function will not override existing page table entries.
 * @warning The caller must ensure that `v_addr` and `p_addr` are page-aligned,
 *          and `size` is a multiple of the page size.
 *
 */
void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr,
                    size_t size, size_t attr)
{
    int ret = -1;
    void *unmap_va = v_addr;
    size_t npages = size >> ARCH_PAGE_SHIFT;
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
    // Map the memory of the .percpu section separately for each core.
    rt_hw_percpu_mmu_init_check();
#endif

    /* TODO trying with HUGEPAGE here */
    while (npages--)
    {
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
        // skip mapping .percpu section pages
        if (v_addr < (void *)&__percpu_start ||
            v_addr >= (void *)&__percpu_end)
#endif
        {
            MM_PGTBL_LOCK(aspace);
            ret = _map_one_page(aspace, v_addr, p_addr, attr);
            MM_PGTBL_UNLOCK(aspace);
            if (ret != 0)
            {
                /* error, undo map */
                while (unmap_va != v_addr)
                {
                    MM_PGTBL_LOCK(aspace);
                    _unmap_area(aspace, unmap_va);
                    MM_PGTBL_UNLOCK(aspace);
                    unmap_va += ARCH_PAGE_SIZE;
                }
                break;
            }
        }
        v_addr += ARCH_PAGE_SIZE;
        p_addr += ARCH_PAGE_SIZE;
    }

    if (ret == 0)
    {
        return unmap_va;
    }

    return NULL;
}

#ifdef ARCH_MM_MMU
void set_free_page(void *page_array)
{
    __init_page_array = page_array;
}

// Early-stage page allocator
unsigned long get_free_page(void)
{
    static rt_atomic_t page_off = 0;

    rt_atomic_t old_off = rt_hw_atomic_add(&page_off, 1);
    if (old_off < ARCH_PAGE_SIZE / sizeof(unsigned long))
    {
        return (unsigned long)(__init_page_array[old_off].page);
    }

    return 0;
}

#ifdef RT_USING_SMP
// Perform early mapping for the .percpu section
static int rt_hw_mmu_map_percpu_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa)
{
    unsigned long page;
    rt_ubase_t off, level_shift;

    level_shift = PPN2_SHIFT;

    // page size 2MB
    off = (va >> level_shift) & VPN_MASK;
    // Step into the next level page table
    tbl = (rt_ubase_t *)((tbl[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
    level_shift -= VPN_BITS;

    off = (va >> level_shift) & VPN_MASK;
    tbl[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;

    asm volatile("sfence.vma x0, x0");
    return 0;
}
#endif /* RT_USING_SMP */

static int rt_hw_mmu_map_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa,
                               rt_ubase_t attr)
{
    unsigned long page, *table;
    rt_ubase_t off, level_shift;

    if ((va & (L2_PAGE_SIZE - 1)) || (pa & (L2_PAGE_SIZE - 1)))
    {
        return MMU_MAP_ERROR_VANOTALIGN;
    }

    table = tbl;
    level_shift = PPN2_SHIFT;

    // page size 2MB
    for (int level = 0; level < 1; ++level)
    {
        off = (va >> level_shift) & VPN_MASK;

        if (!(table[off] & PTE_V))
        {
            if (!(page = get_free_page()))
            {
                return MMU_MAP_ERROR_NOPAGE;
            }

            rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
            table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_PPN_SHIFT) | PTE_V;
        }

        if ((table[off] & PTE_ATTR_RWX) != 0)
        {
            /* No a page! */
            return MMU_MAP_ERROR_CONFLICT;
        }

        /* Step into the next level page table */
        page = (table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT;
        table = (unsigned long *)page;

        level_shift -= VPN_BITS;
    }

    off = (va >> level_shift) & VPN_MASK;
    table[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | attr;

    return 0;
}
#endif

/* unmap page table entry */
static void _unmap_pte(rt_ubase_t *pentry, rt_ubase_t *lvl_entry[], int level)
{
    int loop_flag = 1;
    while (loop_flag)
    {
        loop_flag = 0;
        *pentry = 0;
        rt_hw_cpu_dcache_clean(pentry, sizeof(*pentry));

        /* we don't handle level 0, which is maintained by caller */
        if (level > 0)
        {
            void *page = (void *)((rt_ubase_t)pentry & ~ARCH_PAGE_MASK);

            /* decrease reference from child page to parent */
            rt_pages_free(page, 0);

            int free = rt_page_ref_get(page, 0);
            if (free == 1)
            {
                rt_pages_free(page, 0);
                pentry = lvl_entry[--level];
                loop_flag = 1;
            }
        }
    }
}

/* Unmaps a virtual address range (1GB/2MB/4KB according to actual page level) from the page table. */
static size_t _unmap_area(struct rt_aspace *aspace, void *v_addr)
{
    rt_ubase_t loop_va = __UMASKVALUE((rt_ubase_t)v_addr, PAGE_OFFSET_MASK);
    size_t unmapped = 0;

    int i = 0;
    rt_ubase_t lvl_off[3];
    rt_ubase_t *lvl_entry[3];
    lvl_off[0] = (rt_ubase_t)GET_L1(loop_va);
    lvl_off[1] = (rt_ubase_t)GET_L2(loop_va);
    lvl_off[2] = (rt_ubase_t)GET_L3(loop_va);
    unmapped = 1 << (ARCH_PAGE_SHIFT + ARCH_INDEX_WIDTH * 2ul);

    rt_ubase_t *pentry;
    lvl_entry[i] = ((rt_ubase_t *)aspace->page_table + lvl_off[i]);
    pentry = lvl_entry[i];

    /* check if lvl_entry[0] is valid. if no, return 0 directly. */
    if (!PTE_USED(*pentry))
    {
        return 0;
    }

    /* find leaf page table entry */
    while (PTE_USED(*pentry) && !PAGE_IS_LEAF(*pentry))
    {
        i += 1;

        if (i >= 3)
        {
            unmapped = 0;
            break;
        }

        lvl_entry[i] = ((rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*pentry), PV_OFFSET) +
                        lvl_off[i]);
        pentry = lvl_entry[i];
        unmapped >>= ARCH_INDEX_WIDTH;
    }

    /* clear PTE & setup its */
    if (PTE_USED(*pentry))
    {
        _unmap_pte(pentry, lvl_entry, i);
    }
    else
    {
        unmapped = 0; /* invalid pte, return 0. */
    }

    return unmapped;
}

/**
 * @brief Unmaps a range of virtual memory addresses from the specified address space.
 *
 * This function is responsible for unmapping a contiguous region of virtual memory
 * from the given address space. It handles multiple pages and ensures thread safety
 * by locking the page table during the unmapping operation.
 *
 * @param aspace Pointer to the address space structure from which the memory will be unmapped.
 * @param v_addr Starting virtual address to unmap. Must be page-aligned.
 * @param size Size of the memory region to unmap. Must be page-aligned.
 *
 * @note The caller must ensure that both `v_addr` and `size` are page-aligned.
 *
 * @details The function operates in a loop, unmapping memory in chunks. It uses the
 * `_unmap_area` function to perform the actual unmapping, which is called within a
 * locked section to ensure thread safety. The loop continues until the entire region
 * is unmapped.
 *
 * @see _unmap_area
 * @note unmap is different from map that it can handle multiple pages
 */
void rt_hw_mmu_unmap(struct rt_aspace *aspace, void *v_addr, size_t size)
{
    /* caller guarantee that v_addr & size are page aligned */
    if (!aspace->page_table)
    {
        return;
    }
    size_t unmapped = 0;

    while (size > 0)
    {
        MM_PGTBL_LOCK(aspace);
        unmapped = _unmap_area(aspace, v_addr);
        MM_PGTBL_UNLOCK(aspace);

        /* when unmapped == 0, region not exist in pgtbl */
        if (!unmapped || unmapped > size) break;

        size -= unmapped;
        v_addr += unmapped;
    }
}

#ifdef RT_USING_SMART
static inline void _init_region(void *vaddr, size_t size)
{
    rt_ioremap_start = vaddr;
    rt_ioremap_size = size;
    rt_mpr_start = rt_ioremap_start - rt_mpr_size;
    LOG_D("rt_ioremap_start: %p, rt_mpr_start: %p", rt_ioremap_start,
          rt_mpr_start);
}
#else
static inline void _init_region(void *vaddr, size_t size)
{
    rt_mpr_start = vaddr - rt_mpr_size;
}
#endif

#if defined(RT_USING_SMART) && defined(ARCH_REMAP_KERNEL)
#define KERN_SPACE_START ((void *)KERNEL_VADDR_START)
#define KERN_SPACE_SIZE  (0xfffffffffffff000UL - KERNEL_VADDR_START + 0x1000)
#else
#define KERN_SPACE_START ((void *)0x1000)
#define KERN_SPACE_SIZE  ((size_t)USER_VADDR_START - 0x1000)
#endif

/**
 * @brief Initialize the MMU (Memory Management Unit) mapping.
 *
 * This function initializes the MMU mapping, incluing these steps as follows:
 * 1. Check the validity of the input parameters,
 * 2. Calculate the start and end virtual addresses based on the input virtual address and size.
 * 3. Convert the virtual addresses to PPN2 indices.
 * 4. Check the initialization of the page table. If any entry in the page table within
 *    the specified range is non-zero, it returns -1.
 * 5. It initializes the kernel address space using rt_aspace_init() and initializes the specified region
 *    using _init_region.
 *
 * @param aspace Pointer to the address space. Must not be NULL.
 * @param v_address The starting virtual address.
 * @param size The size of the virtual address space.
 * @param vtable Pointer to the page table. Must not be NULL.
 * @param pv_off The page table offset.
 *
 * @return Returns 0 if the initialization is successful. Returns -1 if any input parameter is invalid
 *         or the page table initialization check fails.
 */
int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, rt_ubase_t size,
                       rt_ubase_t *vtable, rt_ubase_t pv_off)
{
    size_t l1_off, va_s, va_e;

    if ((!aspace) || (!vtable))
    {
        return -1;
    }

    va_s = (rt_ubase_t)v_address;
    va_e = ((rt_ubase_t)v_address) + size - 1;

    if (va_e < va_s)
    {
        return -1;
    }

    /* convert address to PPN2 index */
    va_s = GET_L1(va_s);
    va_e = GET_L1(va_e);

    if (va_s == 0)
    {
        return -1;
    }

    /* vtable initialization check */
    for (l1_off = va_s; l1_off <= va_e; l1_off++)
    {
        size_t v = vtable[l1_off];

        if (v)
        {
            return -1;
        }
    }

    rt_aspace_init(&rt_kernel_space, KERN_SPACE_START, KERN_SPACE_SIZE, vtable);

    _init_region(v_address, size);
    return 0;
}

const static int max_level =
    (ARCH_VADDR_WIDTH - ARCH_PAGE_SHIFT) / ARCH_INDEX_WIDTH;

static inline uintptr_t _get_level_size(int level)
{
    return 1ul << (ARCH_PAGE_SHIFT + (max_level - level) * ARCH_INDEX_WIDTH);
}

static rt_ubase_t *_query(struct rt_aspace *aspace, void *vaddr, int *level)
{
    rt_ubase_t l1_off, l2_off, l3_off;
    rt_ubase_t *mmu_l1, *mmu_l2, *mmu_l3;
    rt_ubase_t pa;

    l1_off = GET_L1((rt_uintptr_t)vaddr);
    l2_off = GET_L2((rt_uintptr_t)vaddr);
    l3_off = GET_L3((rt_uintptr_t)vaddr);

    if (!aspace)
    {
        LOG_W("%s: no aspace", __func__);
        return RT_NULL;
    }

    mmu_l1 = ((rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(rt_hw_cpu_id() * ARCH_PAGE_SIZE))) + l1_off;

    if (PTE_USED(*mmu_l1))
    {
        if (*mmu_l1 & PTE_XWR_MASK)
        {
            *level = 1;
            return mmu_l1;
        }

        mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);

        if (PTE_USED(*(mmu_l2 + l2_off)))
        {
            if (*(mmu_l2 + l2_off) & PTE_XWR_MASK)
            {
                *level = 2;
                return mmu_l2 + l2_off;
            }

            mmu_l3 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)),
                                             PV_OFFSET);

            if (PTE_USED(*(mmu_l3 + l3_off)))
            {
                *level = 3;
                return mmu_l3 + l3_off;
            }
        }
    }

    return RT_NULL;
}

/**
 * @brief Translate a virtual address to a physical address.
 *
 * This function translates a given virtual address (`vaddr`) to its corresponding
 * physical address (`paddr`) using the page table in the specified address space (`aspace`).
 *
 * @param aspace Pointer to the address space structure containing the page table.
 * @param vaddr The virtual address to be translated.
 *
 * @return The translated physical address. If the translation fails, `ARCH_MAP_FAILED` is returned.
 *
 * @note The function queries the page table entry (PTE) for the virtual address using `_query`.
 *       If a valid PTE is found, the physical address is extracted and combined with the offset
 *       from the virtual address. If no valid PTE is found, a debug log is recorded, and
 *       `ARCH_MAP_FAILED` is returned.
 */
void *rt_hw_mmu_v2p(struct rt_aspace *aspace, void *vaddr)
{
    int level;
    rt_ubase_t *pte = _query(aspace, vaddr, &level);
    uintptr_t paddr;

    if (pte)
    {
        paddr = GET_PADDR(*pte);
        paddr |= ((intptr_t)vaddr & (_get_level_size(level) - 1));
    }
    else
    {
        LOG_D("%s: failed at %p", __func__, vaddr);
        paddr = (uintptr_t)ARCH_MAP_FAILED;
    }
    return (void *)paddr;
}

static int _noncache(rt_ubase_t *pte)
{
    return 0;
}

static int _cache(rt_ubase_t *pte)
{
    return 0;
}

static int (*control_handler[MMU_CNTL_DUMMY_END])(rt_ubase_t *pte)=
{
    [MMU_CNTL_CACHE] = _cache,
    [MMU_CNTL_NONCACHE] = _noncache,
};

/**
 * @brief Control the page table entries (PTEs) for a specified virtual address range.
 *
 * This function applies a control command (e.g., cache control) to the page table entries
 * (PTEs) corresponding to the specified virtual address range (`vaddr` to `vaddr + size`).
 *
 * @param aspace Pointer to the address space structure containing the page table.
 * @param vaddr The starting virtual address of the range.
 * @param size The size of the virtual address range.
 * @param cmd The control command to apply (e.g., `MMU_CNTL_CACHE`, `MMU_CNTL_NONCACHE`.etc.).
 *
 * @return `RT_EOK` on success, or an error code (`-RT_EINVAL` or `-RT_ENOSYS`) on failure.
 *
 * @note The function uses the `control_handler` array to map the command to a handler function.
 *       It iterates over the virtual address range, queries the PTEs, and applies the handler
 *       to each valid PTE. If the command is invalid, `-RT_ENOSYS` is returned.
 */
int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
                      enum rt_mmu_cntl cmd)
{
    int level;
    int err = -RT_EINVAL;
    void *vend = vaddr + size;

    int (*handler)(rt_ubase_t *pte);
    if (cmd >= 0 && cmd < MMU_CNTL_DUMMY_END)
    {
        handler = control_handler[cmd];

        while (vaddr < vend)
        {
            rt_ubase_t *pte = _query(aspace, vaddr, &level);
            void *range_end = vaddr + _get_level_size(level);
            RT_ASSERT(range_end <= vend);

            if (pte)
            {
                err = handler(pte);
                RT_ASSERT(err == RT_EOK);
            }
            vaddr = range_end;
        }
    }
    else
    {
        err = -RT_ENOSYS;
    }

    return err;
}

/**
 * @brief setup Page Table for kernel space. It's a fixed map
 * and all mappings cannot be changed after initialization.
 *
 * Memory region in struct mem_desc must be page aligned,
 * otherwise is a failure and no report will be
 * returned.
 *
 * @param aspace Pointer to the address space structure.
 * @param mdesc Pointer to the array of memory descriptors.
 * @param desc_nr Number of memory descriptors in the array.
 */
void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr)
{
    void *err;
    for (size_t i = 0; i < desc_nr; i++)
    {
        size_t attr;
        switch (mdesc->attr)
        {
            case NORMAL_MEM:
                attr = MMU_MAP_K_RWCB;
                break;
            case NORMAL_NOCACHE_MEM:
                attr = MMU_MAP_K_RWCB;
                break;
            case DEVICE_MEM:
                attr = MMU_MAP_K_DEVICE;
                break;
            default:
                attr = MMU_MAP_K_DEVICE;
        }

        struct rt_mm_va_hint hint = {
            .flags = MMF_MAP_FIXED,
            .limit_start = aspace->start,
            .limit_range_size = aspace->size,
            .map_size = mdesc->vaddr_end - mdesc->vaddr_start + 1,
            .prefer = (void *)mdesc->vaddr_start};

        if (mdesc->paddr_start == (rt_uintptr_t)ARCH_MAP_FAILED)
            mdesc->paddr_start = mdesc->vaddr_start + PV_OFFSET;

        rt_aspace_map_phy_static(aspace, &mdesc->varea, &hint, attr,
                                 mdesc->paddr_start >> MM_PAGE_SHIFT, &err);
        mdesc++;
    }

    rt_hw_asid_init();

    rt_hw_aspace_switch(&rt_kernel_space);
    rt_page_cleanup();
}

#define SATP_BASE ((rt_ubase_t)SATP_MODE << SATP_MODE_OFFSET)

/**
 * @brief Early memory setup function for hardware initialization.
 *
 * This function performs early memory setup tasks, including:
 * - Calculating the physical-to-virtual (PV) offset.
 * - Setting up initial page tables for identity mapping and text region relocation.
 * - Applying new memory mappings by updating the SATP register.
 *
 * @note This function is typically called during the early stages of system initialization (startup_gcc.S),
 *       before the memory management system is fully operational.
 *       Here the identity mapping is implemented by a 1-stage page table, whose page size is 1GB.
 */
void rt_hw_mem_setup_early(void *pgtbl, rt_uint64_t hartid)
{
    rt_ubase_t pv_off, size;
    rt_ubase_t ps = 0x0;
    rt_ubase_t vs = 0x0;
    rt_ubase_t *early_pgtbl = (rt_ubase_t *)(pgtbl + hartid * ARCH_PAGE_SIZE);

    /* calculate pv_offset */
    void *symb_pc;
    void *symb_linker;
    __asm__ volatile("la %0, _start\n" : "=r"(symb_pc));
    __asm__ volatile("la %0, _start_link_addr\n" : "=r"(symb_linker));
    symb_linker = *(void **)symb_linker;
    pv_off = symb_pc - symb_linker;
    rt_kmem_pvoff_set(pv_off);

    if (pv_off)
    {
        if (pv_off & ((1ul << (ARCH_INDEX_WIDTH * 2 + ARCH_PAGE_SHIFT)) - 1))
        {
            LOG_E("%s: not aligned virtual address. pv_offset %p", __func__,
                  pv_off);
            RT_ASSERT(0);
        }

        /**
         * identical mapping,
         * PC are still at lower region before relocating to high memory
         */
        rt_ubase_t pg_idx ;
        /* Round down symb_pc to L1_PAGE_SIZE boundary to ensure proper page alignment.
         * This is necessary because MMU operations work with page-aligned addresses, and
         * make sure all the text region is mapped.*/
        ps = (rt_ubase_t)symb_pc & (~(L1_PAGE_SIZE - 1));
        pg_idx = GET_L1(ps);
        early_pgtbl[pg_idx] = COMBINEPTE(ps, MMU_MAP_EARLY);

        /* relocate text region */
        __asm__ volatile("la %0, _start\n" : "=r"(ps));
        ps &= ~(L1_PAGE_SIZE - 1);
        vs = ps - pv_off;

        /* relocate region */
        rt_ubase_t ve = vs + 0x80000000;
#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
        while (vs < ve)
        {
            rt_hw_mmu_map_early(early_pgtbl, vs, ps, MMU_MAP_EARLY);
            vs += L2_PAGE_SIZE;
            ps += L2_PAGE_SIZE;
        }
#else
        for (int i = GET_L1(vs); i < GET_L1(ve); i++)
        {
            early_pgtbl[i] = COMBINEPTE(ps, MMU_MAP_EARLY);
            ps += L1_PAGE_SIZE;
        }
#endif

#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
        // map .percpu section
        ps = (rt_ubase_t)&__percpu_start;
        vs = ps - rt_kmem_pvoff();
        size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
        /* Offset to per-CPU partition for current CPU */
        ps += hartid * size;
        ve = vs + size;

        while (vs < ve)
        {
            /* Map physical address per-CPU partition */
            rt_hw_mmu_map_percpu_early(early_pgtbl, vs, ps);
            ps += L2_PAGE_SIZE;
            vs += L2_PAGE_SIZE;
        }
#endif
        /* apply new mapping */
        asm volatile("sfence.vma x0, x0");
        write_csr(satp, SATP_BASE | ((size_t)early_pgtbl >> PAGE_OFFSET_BIT));
        asm volatile("sfence.vma x0, x0");
    }
    /* return to lower text section */
}

/**
 * @brief Creates and initializes a new MMU page table.
 *
 * This function allocates a new MMU page table, copies the kernel space
 * page table into it, and flushes the data cache to ensure consistency.
 *
 * @return
 * - A pointer to the newly allocated MMU page table on success.
 * - RT_NULL if the allocation fails.
 */
void *rt_hw_mmu_pgtbl_create(void)
{
    rt_ubase_t *mmu_table;
    mmu_table = (rt_ubase_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE);
    if (!mmu_table)
    {
        return RT_NULL;
    }
    rt_memcpy(mmu_table, rt_kernel_space.page_table, ARCH_PAGE_SIZE);
    rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, mmu_table, ARCH_PAGE_SIZE);

    return mmu_table;
}

/**
 * @brief Deletes an MMU page table.
 *
 * This function frees the memory allocated for the given MMU page table.
 *
 * @param pgtbl Pointer to the MMU page table to be deleted.
 */
void rt_hw_mmu_pgtbl_delete(void *pgtbl)
{
    rt_pages_free(pgtbl, 0);
}