ソースを参照

[libcpu-riscv]: [surpport SMP]: Add SMP support for qemu-virt64-riscv

1.Add the necessary function declarations for SMP enablement and implement the corresponding
functionalities, including rt_hw_secondary_cpu_up, secondary_cpu_entry, rt_hw_local_irq_disable,
rt_hw_local_irq_enable, rt_hw_secondary_cpu_idle_exec, rt_hw_spin_lock_init, rt_hw_spin_lock,
rt_hw_spin_unlock, rt_hw_ipi_send, rt_hw_interrupt_set_priority, rt_hw_interrupt_get_priority,
rt_hw_ipi_init, rt_hw_ipi_handler_install, and rt_hw_ipi_handler.

2.In the two functions (rt_hw_context_switch_to and rt_hw_context_switch) in context_gcc.S,
add a call to rt_cpus_lock_status_restore to update the scheduler information.

3.If the MMU is enabled, use the .percpu section and record different hartids by configuring
special page tables; if the MMU is not enabled, record them directly in the satp register.
Additionally, add dynamic startup based on core configuration.The .percpu section is only used
when both ARCH_MM_MMU and RT_USING_SMP are enabled. However, there is a certain amount of space
waste since no macro guard is added for it in the link script currently.

4.The physical memory of QEMU started in CI is 128MB, so RT_HW_PAGE_END is modified from the
original +256MB to +128MB. Modify the SConscript file under the common64 directory to include
common/atomic_riscv.c in the compilation process.

Signed-off-by: Mengchen Teng <teng_mengchen@163.com>
Tm-C-mT 1 ヶ月 前
コミット
acef64ed2a

+ 20 - 0
bsp/qemu-virt64-riscv/SConstruct

@@ -38,5 +38,25 @@ if GetDepend('__STACKSIZE__'): stack_size = GetDepend('__STACKSIZE__')
 stack_lds.write('__STACKSIZE__ = %d;\n' % stack_size)
 stack_lds.close()
 
+# Obtain the number of harts from rtconfig.h and write 
+# it into link_cpus.lds for the linker script
+try:
+    with open('rtconfig.h', 'r') as f:
+        rtconfig_content = f.readlines()
+except FileNotFoundError:
+    cpus_nr = 1
+else:
+    cpus_nr = 1  # default value
+    for line in rtconfig_content:
+        line = line.strip()
+        if line.startswith('#define') and 'RT_CPUS_NR' in line:
+            parts = line.split()
+            if len(parts) >= 3 and parts[2].isdigit():
+                cpus_nr = int(parts[2])
+                break 
+
+with open('link_cpus.lds', 'w') as cpus_lds:
+    cpus_lds.write(f'RT_CPUS_NR = {cpus_nr};\n')
+
 # make a building
 DoBuilding(TARGET, objs)

+ 9 - 0
bsp/qemu-virt64-riscv/driver/board.c

@@ -24,6 +24,10 @@
 #include "plic.h"
 #include "stack.h"
 
+#ifdef RT_USING_SMP
+#include "interrupt.h"
+#endif /* RT_USING_SMP */
+
 #ifdef RT_USING_SMART
 #include "riscv_mmu.h"
 #include "mmu.h"
@@ -89,6 +93,11 @@ void rt_hw_board_init(void)
 
     rt_hw_tick_init();
 
+#ifdef RT_USING_SMP
+    /* ipi init */
+    rt_hw_ipi_init();
+#endif /* RT_USING_SMP */
+
 #ifdef RT_USING_COMPONENTS_INIT
     rt_components_board_init();
 #endif

+ 1 - 1
bsp/qemu-virt64-riscv/driver/board.h

@@ -25,7 +25,7 @@ extern unsigned int __bss_end;
 #define RT_HW_HEAP_BEGIN ((void *)&__bss_end)
 #define RT_HW_HEAP_END   ((void *)(RT_HW_HEAP_BEGIN + 64 * 1024 * 1024))
 #define RT_HW_PAGE_START RT_HW_HEAP_END
-#define RT_HW_PAGE_END   ((void *)(KERNEL_VADDR_START + (256 * 1024 * 1024 - VIRT64_SBI_MEMSZ)))
+#define RT_HW_PAGE_END   ((void *)(KERNEL_VADDR_START + (128 * 1024 * 1024 - VIRT64_SBI_MEMSZ)))
 
 void rt_hw_board_init(void);
 void rt_init_user_mem(struct rt_thread *thread, const char *name,

+ 22 - 6
bsp/qemu-virt64-riscv/link.lds

@@ -9,6 +9,7 @@
  */
 
 INCLUDE "link_stacksize.lds"
+INCLUDE "link_cpus.lds"
 
 OUTPUT_ARCH( "riscv" )
 
@@ -121,12 +122,9 @@ SECTIONS
     {
         . = ALIGN(64);
         __stack_start__ = .;
-
-        . += __STACKSIZE__;
-        __stack_cpu0 = .;
-
-        . += __STACKSIZE__;
-        __stack_cpu1 = .;
+        /* Dynamically allocate stack areas according to RT_CPUS_NR */
+        . += (__STACKSIZE__ * RT_CPUS_NR);
+        __stack_end__ = .;
     } > SRAM
 
     .sbss :
@@ -138,6 +136,24 @@ SECTIONS
         *(.scommon)
     } > SRAM
 
+    .percpu (NOLOAD) :
+    {
+        /* 2MB Align for MMU early map */
+        . = ALIGN(0x200000);
+        PROVIDE(__percpu_start = .);
+
+        *(.percpu)
+
+        /* 2MB Align for MMU early map */
+        . = ALIGN(0x200000);
+
+        PROVIDE(__percpu_end = .);
+
+        /* Clone the area */
+        . = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1);
+        PROVIDE(__percpu_real_end = .);
+    } > SRAM
+    
     .bss :
     {
         *(.bss)

+ 1 - 0
bsp/qemu-virt64-riscv/link_cpus.lds

@@ -0,0 +1 @@
+RT_CPUS_NR = 8;

+ 22 - 6
bsp/qemu-virt64-riscv/link_smart.lds

@@ -9,6 +9,7 @@
  */
 
 INCLUDE "link_stacksize.lds"
+INCLUDE "link_cpus.lds"
 
 OUTPUT_ARCH( "riscv" )
 
@@ -122,12 +123,9 @@ SECTIONS
     {
         . = ALIGN(64);
         __stack_start__ = .;
-
-        . += __STACKSIZE__;
-        __stack_cpu0 = .;
-
-        . += __STACKSIZE__;
-        __stack_cpu1 = .;
+        /* Dynamically allocate stack areas according to RT_CPUS_NR */
+        . += (__STACKSIZE__ * RT_CPUS_NR);
+        __stack_end__ = .;
     } > SRAM
 
     .sbss :
@@ -139,6 +137,24 @@ SECTIONS
         *(.scommon)
     } > SRAM
 
+    .percpu (NOLOAD) :
+    {
+        /* 2MB Align for MMU early map */
+        . = ALIGN(0x200000);
+        PROVIDE(__percpu_start = .);
+
+        *(.percpu)
+
+        /* 2MB Align for MMU early map */
+        . = ALIGN(0x200000);
+
+        PROVIDE(__percpu_end = .);
+
+        /* Clone the area */
+        . = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1);
+        PROVIDE(__percpu_real_end = .);
+    } > SRAM
+
     .bss :
     {
         *(.bss)

+ 14 - 2
bsp/qemu-virt64-riscv/qemu-dbg.sh

@@ -1,4 +1,16 @@
-qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S \
+QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S"
+
+if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then
+    hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}')
+    if [ -z "$hart_num" ]; then
+        hart_num=1
+    fi
+    QEMU_CMD="$QEMU_CMD -smp $hart_num"
+fi
+
+QEMU_CMD="$QEMU_CMD \
 -drive if=none,file=sd.bin,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \
 -netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \
--device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0
+-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0"
+
+eval $QEMU_CMD

+ 15 - 2
bsp/qemu-virt64-riscv/run.sh

@@ -24,7 +24,20 @@ if [ ! -f $path_image ]; then
 	exit
 fi
 
-qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin \
+QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin"
+
+if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then
+    hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds 2>/dev/null | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}')
+    if [ -z "$hart_num" ] || [ "$hart_num" -lt 1 ]; then
+        echo "Warning: Invalid or missing RT_CPUS_NR, defaulting to 1"
+        hart_num=1
+    fi
+    QEMU_CMD="$QEMU_CMD -smp $hart_num"
+fi
+
+QEMU_CMD="$QEMU_CMD \
 -drive if=none,file=$path_image,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \
 -netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \
--device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0
+-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0"
+
+eval $QEMU_CMD

+ 2 - 0
libcpu/risc-v/common64/SConscript

@@ -7,6 +7,8 @@ CPPPATH = [cwd]
 if not GetDepend('ARCH_USING_ASID'):
     SrcRemove(src, ['asid.c'])
 
+src.append('../common/atomic_riscv.c')
+
 group = DefineGroup('CPU', src, depend = [''], CPPPATH = CPPPATH)
 
 Return('group')

+ 31 - 4
libcpu/risc-v/common64/context_gcc.S

@@ -69,29 +69,47 @@
 .endm
 
 /*
+ * #ifdef RT_USING_SMP
+ * void rt_hw_context_switch_to(rt_ubase_t to, stuct rt_thread *to_thread);
+ * #else
  * void rt_hw_context_switch_to(rt_ubase_t to);
- *
- * a0 --> to SP pointer
+ * #endif
+ * a0 --> to
+ * a1 --> to_thread
  */
 .globl rt_hw_context_switch_to
 rt_hw_context_switch_to:
     LOAD sp, (a0)
 
+#ifdef RT_USING_SMP
+    /* Pass the previous CPU lock status to rt_cpus_lock_status_restore for restoration */
+    mv   a0,   a1
+    call  rt_cpus_lock_status_restore
+#endif
+
     call rt_thread_self
     mv s1, a0
 
+#ifndef RT_USING_SMP
+    //if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore.
     #ifdef RT_USING_SMART
-        call lwp_aspace_switch
+      call lwp_aspace_switch
     #endif
+#endif
 
     RESTORE_CONTEXT
     sret
 
 /*
+ * #ifdef RT_USING_SMP
+ * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread);
+ * #else
  * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to);
+ * #endif
  *
  * a0 --> from SP pointer
  * a1 --> to SP pointer
+ * a2 --> to_thread
  *
  * It should only be used on local interrupt disable
  */
@@ -103,13 +121,22 @@ rt_hw_context_switch:
     // restore to thread SP
     LOAD sp, (a1)
 
+#ifdef RT_USING_SMP
+    /* Pass the previous CPU lock status to rt_cpus_lock_status_restore for restoration */
+    mv   a0,   a2
+    call  rt_cpus_lock_status_restore
+#endif /*RT_USING_SMP*/
+
     // restore Address Space
     call rt_thread_self
     mv s1, a0
 
+#ifndef RT_USING_SMP
+    // if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore.
     #ifdef RT_USING_SMART
-        call lwp_aspace_switch
+       call lwp_aspace_switch
     #endif
+#endif
 
     RESTORE_CONTEXT
     sret

+ 121 - 6
libcpu/risc-v/common64/cpuport.c

@@ -18,16 +18,25 @@
 #include <sbi.h>
 #include <encoding.h>
 
+#ifdef ARCH_MM_MMU
+#include "mmu.h"
+#endif
+
+#ifdef RT_USING_SMP
+#include "tick.h"
+#include "interrupt.h"
+#endif /* RT_USING_SMP */
+
 #ifdef ARCH_RISCV_FPU
-    #define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
+#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS)
 #else
-    #define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
+#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM)
 #endif
 
 #ifdef ARCH_RISCV_VECTOR
-    #define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS)
+#define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS)
 #else
-    #define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE
+#define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE
 #endif
 #ifdef RT_USING_SMART
 #include <lwp_arch.h>
@@ -49,10 +58,13 @@ volatile rt_ubase_t rt_interrupt_to_thread = 0;
  */
 volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0;
 
+#ifdef ARCH_MM_MMU
+static rt_ubase_t *percpu_hartid;
+#endif
+
 void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
 {
-    rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)
-        ((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
+    rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame));
 
     rt_memset(frame, 0, sizeof(struct rt_hw_switch_frame));
 
@@ -64,7 +76,21 @@ void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus)
 
 int rt_hw_cpu_id(void)
 {
+#ifndef RT_USING_SMP
     return 0;
+#else
+    if (rt_kmem_pvoff() != 0)
+    {
+        return *percpu_hartid;
+    }
+    else
+    {
+        // if not enable MMU or pvoff==0, read hartid from satp register
+        rt_ubase_t hartid;
+        asm volatile("csrr %0, satp" : "=r"(hartid));
+        return hartid & 0xFFFF;  // Assuming hartid fits in lower 16 bits
+    }
+#endif /* RT_USING_SMP */
 }
 
 /**
@@ -117,6 +143,18 @@ void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t
 
     return;
 }
+#else
+void rt_hw_context_switch_interrupt(void *context, rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread)
+{
+    /* Perform architecture-specific context switch. This call will
+     * restore the target thread context and should not return when a
+     * switch is performed. The caller (scheduler) invoked this function
+     * in a context where local IRQs are disabled. */
+    rt_uint32_t level;
+    level = rt_hw_local_irq_disable();
+    rt_hw_context_switch((rt_ubase_t)from, (rt_ubase_t)to, to_thread);
+    rt_hw_local_irq_enable(level);
+}
 #endif /* end of RT_USING_SMP */
 
 /** shutdown CPU */
@@ -137,3 +175,80 @@ void rt_hw_set_process_id(int pid)
 {
     // TODO
 }
+
+#ifdef RT_USING_SMP
+extern void _start(void);
+extern int boot_hartid;
+/* Boot secondary harts using the SBI HSM hart_start call. */
+void rt_hw_secondary_cpu_up(void)
+{
+    rt_uint64_t entry_pa;
+    int hart, ret;
+
+    /* translate kernel virtual _start to physical address. */
+#ifdef ARCH_MM_MMU
+    if (rt_kmem_pvoff() != 0)
+    {
+        entry_pa = (rt_uint64_t)rt_kmem_v2p(&_start);
+    }
+    else
+    {
+        entry_pa = (rt_uint64_t)&_start;
+    }
+#else
+    entry_pa = (rt_uint64_t)&_start;
+#endif /* ARCH_MM_MMU */
+    /* Assumes hart IDs are in range [0, RT_CPUS_NR) */
+    RT_ASSERT(boot_hartid < RT_CPUS_NR);
+    for (hart = 0; hart < RT_CPUS_NR; hart++)
+    {
+        if (hart == boot_hartid)
+            continue;
+
+        ret = sbi_hsm_hart_start((unsigned long)hart,
+                                 (unsigned long)entry_pa,
+                                 0UL);
+        if (ret)
+        {
+            rt_kprintf("sbi_hsm_hart_start failed for hart %d: %d\n", hart, ret);
+        }
+    }
+}
+
+#ifdef ARCH_MM_MMU
+void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid)
+{
+    RT_ASSERT(hartid < RT_CPUS_NR);
+    rt_ubase_t *percpu_hartid_paddr;
+    rt_size_t percpu_size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
+
+    percpu_hartid = percpu_ptr;
+
+    // from virtual address to physical address
+    percpu_ptr = (rt_ubase_t *)((rt_ubase_t)percpu_ptr + (rt_ubase_t)rt_kmem_pvoff());
+    percpu_hartid_paddr = percpu_ptr;
+
+    /* Save to the real area */
+    *(rt_ubase_t *)((void *)percpu_hartid_paddr + hartid * percpu_size) = hartid;
+}
+#endif /* ARCH_MM_MMU */
+
+void secondary_cpu_entry(void)
+{
+
+#ifdef RT_USING_SMART
+    /* switch to kernel address space */
+    rt_hw_aspace_switch(&rt_kernel_space);
+#endif
+    /* The PLIC peripheral interrupts are currently handled by the boot_hart. */
+    /* Enable the Supervisor-Timer bit in SIE */
+    rt_hw_tick_init();
+
+    /* ipi init */
+    rt_hw_ipi_init();
+
+    rt_hw_spin_lock(&_cpus_lock);
+    /* invoke system scheduler start for secondary CPU */
+    rt_system_scheduler_start();
+}
+#endif /* RT_USING_SMP */

+ 4 - 0
libcpu/risc-v/common64/cpuport.h

@@ -43,6 +43,10 @@ rt_inline void rt_hw_isb(void)
     __asm__ volatile(OPC_FENCE_I:::"memory");
 }
 
+#ifdef ARCH_MM_MMU
+void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid);
+#endif
+
 #endif
 
 #endif

+ 2 - 0
libcpu/risc-v/common64/encoding.h

@@ -176,6 +176,8 @@
 #define PTE_A     0x040 // Accessed
 #define PTE_D     0x080 // Dirty
 #define PTE_SOFT  0x300 // Reserved for Software
+#define PTE_ATTR_RW         (PTE_R | PTE_W)
+#define PTE_ATTR_RWX        (PTE_ATTR_RW | PTE_X)
 
 #define PTE_PPN_SHIFT 10
 

+ 23 - 0
libcpu/risc-v/common64/interrupt_gcc.S

@@ -60,10 +60,17 @@ _handle_interrupt_and_exception:
     call    handle_trap
 
 _interrupt_exit:
+#ifndef RT_USING_SMP
     la      s0, rt_thread_switch_interrupt_flag
     lw      s2, 0(s0)
     beqz    s2, _resume_execution
     sw      zero, 0(s0)
+#else
+    mv      a0, sp
+    call    rt_scheduler_do_irq_switch
+    // if failed, jump to __resume_execution
+    j       _resume_execution
+#endif /* RT_USING_SMP */
 
 _context_switch:
     la      t0, rt_interrupt_from_thread
@@ -88,6 +95,7 @@ _resume_kernel:
     csrw    sscratch, zero
     sret
 
+#ifndef RT_USING_SMP
 .global rt_hw_interrupt_enable
 rt_hw_interrupt_enable:
     csrs sstatus, a0    /* restore to old csr */
@@ -97,3 +105,18 @@ rt_hw_interrupt_enable:
 rt_hw_interrupt_disable:
     csrrci a0, sstatus, 2   /* clear SIE */
     jr ra
+#else
+.global rt_hw_local_irq_disable
+rt_hw_local_irq_disable:
+    csrrci a0, sstatus, 2
+    jr ra
+
+.global rt_hw_local_irq_enable
+rt_hw_local_irq_enable:
+    csrs sstatus, a0
+    jr ra
+
+.global rt_hw_secondary_cpu_idle_exec
+rt_hw_secondary_cpu_idle_exec:
+    jr ra
+#endif  /* RT_USING_SMP */

+ 299 - 63
libcpu/risc-v/common64/mmu.c

@@ -38,10 +38,21 @@
 
 static size_t _unmap_area(struct rt_aspace *aspace, void *v_addr);
 
+/* Define the structure of early page table */
+struct page_table
+{
+    unsigned long page[ARCH_PAGE_SIZE / sizeof(unsigned long)];
+};
+static struct page_table *__init_page_array;
+
+#ifndef RT_USING_SMP
 static void *current_mmu_table = RT_NULL;
+#else
+static void *current_mmu_table[RT_CPUS_NR] = { RT_NULL };
+#endif /* RT_USING_SMP */
 
 volatile __attribute__((aligned(4 * 1024)))
-rt_ubase_t MMUTable[__SIZE(VPN2_BIT)];
+rt_ubase_t MMUTable[__SIZE(VPN2_BIT) * RT_CPUS_NR];
 
 /**
  * @brief Switch the current address space to the specified one.
@@ -69,8 +80,15 @@ void rt_hw_aspace_switch(rt_aspace_t aspace)
 #else /* !ARCH_USING_ASID */
 void rt_hw_aspace_switch(rt_aspace_t aspace)
 {
-    uintptr_t page_table = (uintptr_t)rt_kmem_v2p(aspace->page_table);
+    // It is necessary to find the MMU page table specific to each core.
+    uint32_t hartid = rt_cpu_get_id();
+    uintptr_t ptr = (uintptr_t)aspace->page_table + (uintptr_t)(hartid * ARCH_PAGE_SIZE);
+    uintptr_t page_table = (uintptr_t)rt_kmem_v2p((void *)ptr);
+#ifndef RT_USING_SMP
     current_mmu_table = aspace->page_table;
+#else
+    current_mmu_table[rt_hw_cpu_id()] = (void *)ptr;
+#endif
 
     write_csr(satp, (((size_t)SATP_MODE) << SATP_MODE_OFFSET) |
                         ((rt_ubase_t)page_table >> PAGE_OFFSET_BIT));
@@ -85,7 +103,11 @@ void rt_hw_asid_init(void)
 /* get current page table. */
 void *rt_hw_mmu_tbl_get()
 {
+#ifndef RT_USING_SMP
     return current_mmu_table;
+#else
+    return current_mmu_table[rt_hw_cpu_id()];
+#endif /* RT_USING_SMP */
 }
 
 /* Map a single virtual address page to a physical address page in the page table. */
@@ -98,65 +120,152 @@ static int _map_one_page(struct rt_aspace *aspace, void *va, void *pa,
     l1_off = GET_L1((size_t)va);
     l2_off = GET_L2((size_t)va);
     l3_off = GET_L3((size_t)va);
+    /* Create a separate page table for each hart to facilitate access to the .percpu section. */
+    for (int hartid = 0; hartid < RT_CPUS_NR; hartid++)
+    {
+        mmu_l1 = (rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(hartid * ARCH_PAGE_SIZE)) + l1_off;
 
-    mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off;
+        if (PTE_USED(*mmu_l1))
+        {
+            mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);
+        }
+        else
+        {
+            mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0);
 
-    if (PTE_USED(*mmu_l1))
-    {
-        mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET);
-    }
-    else
-    {
-        mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0);
+            if (mmu_l2)
+            {
+                rt_memset(mmu_l2, 0, PAGE_SIZE);
+                rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE);
+                *mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET),
+                                    PAGE_DEFAULT_ATTR_NEXT);
+                rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1));
+            }
+            else
+            {
+                return -1;
+            }
+        }
 
-        if (mmu_l2)
+        if (PTE_USED(*(mmu_l2 + l2_off)))
         {
-            rt_memset(mmu_l2, 0, PAGE_SIZE);
-            rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE);
-            *mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET),
-                                 PAGE_DEFAULT_ATTR_NEXT);
-            rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1));
+            RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off)));
+            mmu_l3 =
+                (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET);
         }
         else
         {
-            return -1;
+            mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0);
+
+            if (mmu_l3)
+            {
+                rt_memset(mmu_l3, 0, PAGE_SIZE);
+                rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE);
+                *(mmu_l2 + l2_off) =
+                    COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET),
+                            PAGE_DEFAULT_ATTR_NEXT);
+                rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2));
+                /* declares a reference to parent page table */
+                rt_page_ref_inc((void *)mmu_l2, 0);
+            }
+            else
+            {
+                return -1;
+            }
         }
-    }
 
-    if (PTE_USED(*(mmu_l2 + l2_off)))
-    {
-        RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off)));
-        mmu_l3 =
-            (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET);
+        RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off)));
+        /* declares a reference to parent page table */
+        rt_page_ref_inc((void *)mmu_l3, 0);
+        *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr);
+        rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off)));
     }
-    else
-    {
-        mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0);
 
-        if (mmu_l3)
+    return 0;
+}
+
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+static int _map_percpu_area(rt_ubase_t *table, void *va, void *pa, int cpu_id)
+{
+    unsigned long page;
+    rt_ubase_t off, level_shift;
+
+    level_shift = PPN2_SHIFT;
+
+    // map pages - 4KB
+    for (int level = 0; level < 2; ++level)
+    {
+        off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
+        if (table[off] & PTE_V)
         {
-            rt_memset(mmu_l3, 0, PAGE_SIZE);
-            rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE);
-            *(mmu_l2 + l2_off) =
-                COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET),
-                           PAGE_DEFAULT_ATTR_NEXT);
-            rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2));
-            /* declares a reference to parent page table */
-            rt_page_ref_inc((void *)mmu_l2, 0);
+            /* Step into the next level page table */
+            table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
+            level_shift -= VPN_BITS;
+            continue;
         }
-        else
+        if (!(page = get_free_page()))
         {
-            return -1;
+            return MMU_MAP_ERROR_NOPAGE;
         }
+        rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
+        table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_BITS) | PTE_V;
+
+        rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));
+
+        /* Step into the next level page table */
+        table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
+
+        level_shift -= VPN_BITS;
     }
 
-    RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off)));
-    /* declares a reference to parent page table */
-    rt_page_ref_inc((void *)mmu_l3, 0);
-    *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr);
-    rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off)));
-    return 0;
+    off = ((rt_ubase_t)va >> level_shift) & VPN_MASK;
+    table[off] = (((rt_ubase_t)pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;
+
+    rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *));
+
+    return ARCH_PAGE_SIZE;
+}
+
+// Ensure that the .percpu section is mapped in the specific address for each core.
+static void rt_hw_percpu_mmu_init_check(void)
+{
+    size_t mapped, size;
+    void *page_table, *vaddr, *paddr;
+    static rt_bool_t inited = RT_FALSE;
+
+    if (inited)
+    {
+        return;
+    }
+    inited = RT_TRUE;
+
+    page_table = rt_kernel_space.page_table;
+
+    for (int hartid = 0; hartid < RT_CPUS_NR; ++hartid)
+    {
+        vaddr = &__percpu_start;
+        paddr = vaddr + rt_kmem_pvoff();
+        size = (size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
+        /* Offset to per-CPU partition for current CPU */
+        paddr += size * hartid;
+
+        while (size > 0)
+        {
+            MM_PGTBL_LOCK(&rt_kernel_space);
+            mapped = _map_percpu_area(page_table, vaddr, paddr, hartid);
+            MM_PGTBL_UNLOCK(&rt_kernel_space);
+
+            RT_ASSERT(mapped > 0);
+
+            size -= mapped;
+            vaddr += mapped;
+            paddr += mapped;
+        }
+
+        page_table += ARCH_PAGE_SIZE;
+    }
 }
+#endif /* RT_USING_SMP && RT_USING_SMART */
 
 /**
  * @brief Maps a virtual address space to a physical address space.
@@ -185,24 +294,35 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr,
     int ret = -1;
     void *unmap_va = v_addr;
     size_t npages = size >> ARCH_PAGE_SHIFT;
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+    // Map the memory of the .percpu section separately for each core.
+    rt_hw_percpu_mmu_init_check();
+#endif
 
     /* TODO trying with HUGEPAGE here */
     while (npages--)
     {
-        MM_PGTBL_LOCK(aspace);
-        ret = _map_one_page(aspace, v_addr, p_addr, attr);
-        MM_PGTBL_UNLOCK(aspace);
-        if (ret != 0)
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+        // skip mapping .percpu section pages
+        if (v_addr < (void *)&__percpu_start ||
+            v_addr >= (void *)&__percpu_end)
+#endif
         {
-            /* error, undo map */
-            while (unmap_va != v_addr)
+            MM_PGTBL_LOCK(aspace);
+            ret = _map_one_page(aspace, v_addr, p_addr, attr);
+            MM_PGTBL_UNLOCK(aspace);
+            if (ret != 0)
             {
-                MM_PGTBL_LOCK(aspace);
-                _unmap_area(aspace, unmap_va);
-                MM_PGTBL_UNLOCK(aspace);
-                unmap_va += ARCH_PAGE_SIZE;
+                /* error, undo map */
+                while (unmap_va != v_addr)
+                {
+                    MM_PGTBL_LOCK(aspace);
+                    _unmap_area(aspace, unmap_va);
+                    MM_PGTBL_UNLOCK(aspace);
+                    unmap_va += ARCH_PAGE_SIZE;
+                }
+                break;
             }
-            break;
         }
         v_addr += ARCH_PAGE_SIZE;
         p_addr += ARCH_PAGE_SIZE;
@@ -216,6 +336,99 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr,
     return NULL;
 }
 
+#ifdef ARCH_MM_MMU
+void set_free_page(void *page_array)
+{
+    __init_page_array = page_array;
+}
+
+// Early-stage page allocator
+unsigned long get_free_page(void)
+{
+    static rt_atomic_t page_off = 0;
+
+    rt_atomic_t old_off = rt_hw_atomic_add(&page_off, 1);
+    if (old_off < ARCH_PAGE_SIZE / sizeof(unsigned long))
+    {
+        return (unsigned long)(__init_page_array[old_off].page);
+    }
+
+    return 0;
+}
+
+#ifdef RT_USING_SMP
+// Perform early mapping for the .percpu section
+static int rt_hw_mmu_map_percpu_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa)
+{
+    unsigned long page;
+    rt_ubase_t off, level_shift;
+
+    level_shift = PPN2_SHIFT;
+
+    // page size 2MB
+    off = (va >> level_shift) & VPN_MASK;
+    // Step into the next level page table
+    tbl = (rt_ubase_t *)((tbl[off] >> PTE_BITS) << ARCH_PAGE_SHIFT);
+    level_shift -= VPN_BITS;
+
+    off = (va >> level_shift) & VPN_MASK;
+    tbl[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB;
+
+    asm volatile("sfence.vma x0, x0");
+    return 0;
+}
+#endif /* RT_USING_SMP */
+
+static int rt_hw_mmu_map_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa,
+                               rt_ubase_t attr)
+{
+    unsigned long page, *table;
+    rt_ubase_t off, level_shift;
+
+    if ((va & (L2_PAGE_SIZE - 1)) || (pa & (L2_PAGE_SIZE - 1)))
+    {
+        return MMU_MAP_ERROR_VANOTALIGN;
+    }
+
+    table = tbl;
+    level_shift = PPN2_SHIFT;
+
+    // page size 2MB
+    for (int level = 0; level < 1; ++level)
+    {
+        off = (va >> level_shift) & VPN_MASK;
+
+        if (!(table[off] & PTE_V))
+        {
+            if (!(page = get_free_page()))
+            {
+                return MMU_MAP_ERROR_NOPAGE;
+            }
+
+            rt_memset((void *)page, 0, ARCH_PAGE_SIZE);
+            table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_PPN_SHIFT) | PTE_V;
+        }
+
+        if ((table[off] & PTE_ATTR_RWX) != 0)
+        {
+            /* No a page! */
+            return MMU_MAP_ERROR_CONFLICT;
+        }
+
+        /* Step into the next level page table */
+        page = (table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT;
+        table = (unsigned long *)page;
+
+        level_shift -= VPN_BITS;
+    }
+
+    off = (va >> level_shift) & VPN_MASK;
+    table[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | attr;
+
+    return 0;
+}
+#endif
+
 /* unmap page table entry */
 static void _unmap_pte(rt_ubase_t *pentry, rt_ubase_t *lvl_entry[], int level)
 {
@@ -456,7 +669,7 @@ static rt_ubase_t *_query(struct rt_aspace *aspace, void *vaddr, int *level)
         return RT_NULL;
     }
 
-    mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off;
+    mmu_l1 = ((rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(rt_hw_cpu_id() * ARCH_PAGE_SIZE))) + l1_off;
 
     if (PTE_USED(*mmu_l1))
     {
@@ -648,8 +861,6 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr)
 
 #define SATP_BASE ((rt_ubase_t)SATP_MODE << SATP_MODE_OFFSET)
 
-extern unsigned int __bss_end;
-
 /**
  * @brief Early memory setup function for hardware initialization.
  *
@@ -662,12 +873,12 @@ extern unsigned int __bss_end;
  *       before the memory management system is fully operational.
  *       Here the identity mapping is implemented by a 1-stage page table, whose page size is 1GB.
  */
-void rt_hw_mem_setup_early(void)
+void rt_hw_mem_setup_early(void *pgtbl, rt_uint64_t hartid)
 {
-    rt_ubase_t pv_off;
+    rt_ubase_t pv_off, size;
     rt_ubase_t ps = 0x0;
     rt_ubase_t vs = 0x0;
-    rt_ubase_t *early_pgtbl = (rt_ubase_t *)(((size_t)&__bss_end + 4095) & ~0xfff);
+    rt_ubase_t *early_pgtbl = (rt_ubase_t *)(pgtbl + hartid * ARCH_PAGE_SIZE);
 
     /* calculate pv_offset */
     void *symb_pc;
@@ -705,14 +916,39 @@ void rt_hw_mem_setup_early(void)
         vs = ps - pv_off;
 
         /* relocate region */
-        rt_ubase_t vs_idx = GET_L1(vs);
-        rt_ubase_t ve_idx = GET_L1(vs + 0x80000000);
-        for (size_t i = vs_idx; i < ve_idx; i++)
+        rt_ubase_t ve = vs + 0x80000000;
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+        while (vs < ve)
+        {
+            rt_hw_mmu_map_early(early_pgtbl, vs, ps, MMU_MAP_EARLY);
+            vs += L2_PAGE_SIZE;
+            ps += L2_PAGE_SIZE;
+        }
+#else
+        for (int i = GET_L1(vs); i < GET_L1(ve); i++)
         {
             early_pgtbl[i] = COMBINEPTE(ps, MMU_MAP_EARLY);
             ps += L1_PAGE_SIZE;
         }
+#endif
+
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+        // map .percpu section
+        ps = (rt_ubase_t)&__percpu_start;
+        vs = ps - rt_kmem_pvoff();
+        size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start);
+        /* Offset to per-CPU partition for current CPU */
+        ps += hartid * size;
+        ve = vs + size;
 
+        while (vs < ve)
+        {
+            /* Map physical address per-CPU partition */
+            rt_hw_mmu_map_percpu_early(early_pgtbl, vs, ps);
+            ps += L2_PAGE_SIZE;
+            vs += L2_PAGE_SIZE;
+        }
+#endif
         /* apply new mapping */
         asm volatile("sfence.vma x0, x0");
         write_csr(satp, SATP_BASE | ((size_t)early_pgtbl >> PAGE_OFFSET_BIT));

+ 9 - 0
libcpu/risc-v/common64/mmu.h

@@ -56,6 +56,14 @@ struct mem_desc
 #define MMU_MAP_ERROR_NOPAGE     -3
 #define MMU_MAP_ERROR_CONFLICT   -4
 
+#define VPN_MASK    0x1ffUL
+#define PTE_BITS    10
+#define VPN_BITS    9
+
+#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU)
+extern unsigned int __percpu_end, __percpu_start;
+#endif /* RT_USING_SMP && ARCH_MM_MMU */
+
 void *rt_hw_mmu_tbl_get(void);
 int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, rt_ubase_t size,
                        rt_ubase_t *vtable, rt_ubase_t pv_off);
@@ -72,4 +80,5 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size,
 void *rt_hw_mmu_pgtbl_create(void);
 void rt_hw_mmu_pgtbl_delete(void *pgtbl);
 
+unsigned long get_free_page(void);
 #endif

+ 106 - 0
libcpu/risc-v/common64/startup_gcc.S

@@ -32,9 +32,30 @@ _start:
 1:
     /* save hartid */
     la t0, boot_hartid                /* global varible rt_boot_hartid */
+#ifdef RT_USING_SMP
+    lw   t2, (t0)
+    li   t3, 0xdeadbeef               /* Sentinel value indicating uninitialized boot_hartid */
+    li   t4, 0xffffffff
+    and  t2, t2, t4                   /* Extract the lower 32 bits. */
+    bne  t2, t3, system_init          /* If the current value is 0xdeadbeef, skip the boot_hartid assignment operation. */
+#endif
     mv t1, a0                         /* get hartid in S-mode frome a0 register */
     sw t1, (t0)                       /* store t1 register low 4 bits in memory address which is stored in t0 */
 
+#ifdef RT_USING_SMP
+system_init:
+#endif
+
+    /*
+     * When ARCH_MM_MMU is not enabled or pvoff==0:
+     * Store hartid temporarily in the satp register because:
+     * 1. satp is not used for address translation when MMU is disabled or pvoff==0.
+     * 2. This value will be moved to percpu_hartid once MMU is initialized.
+     * This approach avoids using extra memory or registers during the critical boot phase,
+     * but developers should be aware that satp is overloaded for this purpose until MMU setup.
+     */
+    csrw satp, a0
+
     /* clear Interrupt Registers */
     csrw sie, 0
     csrw sip, 0
@@ -51,7 +72,10 @@ _start:
     li x7, 0
     li x8, 0
     li x9, 0
+#ifndef RT_USING_SMP
+    /* In the SMP architecture, a0 will be used again later */
     li x10,0
+#endif
     li x11,0
     li x12,0
     li x13,0
@@ -85,17 +109,42 @@ _start:
     la gp, __global_pointer$
 .option pop
 
+#ifndef RT_USING_SMP
     /* removed SMP support here */
     la   sp, __stack_start__
     li   t0, __STACKSIZE__
     add  sp, sp, t0
+#else
+    /* Initialize the sp pointer according to different hartids. */
+    mv   t0, a0
+    /* calculate stack offset: hartid * __STACKSIZE__ */
+    li   t1, __STACKSIZE__
+    mul  t0, t0, t1        /* t0 = hartid * __STACKSIZE__ */
+
+    /* set stack pointer */
+    la   sp, __stack_start__
+    add  sp, sp, t0        /* sp = __stack_start__ + hartid * __STACKSIZE__ */
+    add  sp, sp, t1        /* sp += __STACKSIZE__ (point to stack top) */
+
+    mv      t0, a0
+    lw      t1, boot_hartid
+    mv      tp, a0
+    bne     t0, t1, early_secondary_cpu_entry
+#endif /* RT_USING_SMP */
 
     /**
      * sscratch is always zero on kernel mode
      */
     csrw sscratch, zero
     call init_bss
+early_secondary_cpu_entry:
 #ifdef ARCH_MM_MMU
+    // Manually manage pages in the early stage
+    la      a0, .early_page_array
+    call    set_free_page
+
+    la      a0, .early_tbl_page
+    mv      a1, tp
     call    rt_hw_mem_setup_early
     call    rt_kmem_pvoff
     /* a0 := pvoff  */
@@ -106,13 +155,37 @@ _start:
     sub     x1, x1, a0
     ret
 _after_pc_relocation:
+#if defined(RT_USING_SMP)
+    /* If the MMU is enabled, save the hartid in percpu_hartid.
+     * -> .percpu_hartid (hartid_0)
+     * ...... align(2MB)
+     * -> (hartid_1)
+     * ......
+    */
+    la      a0, .percpu_hartid
+    mv      a1, tp
+    call    rt_hw_percpu_hartid_init
+#endif
     /* relocate gp */
     sub     gp, gp, a0
 
+#ifndef RT_USING_SMP
     /* relocate context: sp */
     la      sp, __stack_start__
     li      t0, __STACKSIZE__
     add     sp, sp, t0
+#else
+    /* Initialize the sp pointer according to different hartids. */
+    mv   t0, tp
+    /* calculate stack offset: hartid * __STACKSIZE__ */
+    li   t1, __STACKSIZE__
+    mul  t0, t0, t1        /* t0 = hartid * __STACKSIZE__ */
+
+    /* set stack pointer */
+    la   sp, __stack_start__
+    add  sp, sp, t0        /* sp = __stack_start__ + hartid * __STACKSIZE__ */
+    add  sp, sp, t1        /* sp += __STACKSIZE__ (point to stack top) */
+#endif /* RT_USING_SMP */
 
     /* reset s0-fp */
     mv      s0, zero
@@ -121,7 +194,12 @@ _after_pc_relocation:
     la      t0, trap_entry
     csrw    stvec, t0
 1:
+#ifdef RT_USING_SMP
+    mv      t0, tp
+    lw      t1, boot_hartid
+    bne     t0, t1, secondary_cpu_entry
 #endif
+#endif /* ARCH_MM_MMU */
     call    sbi_init
     call    primary_cpu_entry
 
@@ -131,3 +209,31 @@ _never_return_here:
 .global _start_link_addr
 _start_link_addr:
     .dword __text_start
+
+#ifdef ARCH_MM_MMU
+#ifdef RT_USING_SMP
+/*
+ * CPU stack builtin
+ */
+    .section ".percpu"
+.percpu_hartid:
+    .space 16
+#endif
+
+    .section ".bss"
+
+.equ page_size, 4096
+    .balign page_size
+.early_tbl_page:
+    .space 1 * page_size
+#if defined(RT_USING_SMP) && RT_CPUS_NR > 1
+    .space (RT_CPUS_NR - 1) * page_size
+#endif
+
+.early_page_array:
+    .space (8 + 8) * page_size
+#ifdef RT_USING_SMP
+    .space RT_CPUS_NR * 5 * page_size
+#endif
+
+#endif /* ARCH_MM_MMU */

+ 91 - 73
libcpu/risc-v/common64/trap.c

@@ -76,44 +76,44 @@ void dump_regs(struct rt_hw_stack_frame *regs)
     rt_kprintf("\tCurrent Page Table(Physical) = %p\n",
                __MASKVALUE(satp_v, __MASK(44)) << PAGE_OFFSET_BIT);
     rt_kprintf("\tCurrent ASID = %p\n", __MASKVALUE(satp_v >> 44, __MASK(16))
-                                              << PAGE_OFFSET_BIT);
+                                            << PAGE_OFFSET_BIT);
     const char *mode_str = "Unknown Address Translation/Protection Mode";
 
     switch (__MASKVALUE(satp_v >> 60, __MASK(4)))
     {
-        case 0:
-            mode_str = "No Address Translation/Protection Mode";
-            break;
+    case 0:
+        mode_str = "No Address Translation/Protection Mode";
+        break;
 
-        case 8:
-            mode_str = "Page-based 39-bit Virtual Addressing Mode";
-            break;
+    case 8:
+        mode_str = "Page-based 39-bit Virtual Addressing Mode";
+        break;
 
-        case 9:
-            mode_str = "Page-based 48-bit Virtual Addressing Mode";
-            break;
+    case 9:
+        mode_str = "Page-based 48-bit Virtual Addressing Mode";
+        break;
     }
 
     rt_kprintf("\tMode = %s\n", mode_str);
     rt_kprintf("-----------------Dump OK---------------------\n");
 }
 
-static const char *Exception_Name[] = {"Instruction Address Misaligned",
-                                       "Instruction Access Fault",
-                                       "Illegal Instruction",
-                                       "Breakpoint",
-                                       "Load Address Misaligned",
-                                       "Load Access Fault",
-                                       "Store/AMO Address Misaligned",
-                                       "Store/AMO Access Fault",
-                                       "Environment call from U-mode",
-                                       "Environment call from S-mode",
-                                       "Reserved-10",
-                                       "Reserved-11",
-                                       "Instruction Page Fault",
-                                       "Load Page Fault",
-                                       "Reserved-14",
-                                       "Store/AMO Page Fault"};
+static const char *Exception_Name[] = { "Instruction Address Misaligned",
+                                        "Instruction Access Fault",
+                                        "Illegal Instruction",
+                                        "Breakpoint",
+                                        "Load Address Misaligned",
+                                        "Load Access Fault",
+                                        "Store/AMO Address Misaligned",
+                                        "Store/AMO Access Fault",
+                                        "Environment call from U-mode",
+                                        "Environment call from S-mode",
+                                        "Reserved-10",
+                                        "Reserved-11",
+                                        "Instruction Page Fault",
+                                        "Load Page Fault",
+                                        "Reserved-14",
+                                        "Store/AMO Page Fault" };
 
 static const char *Interrupt_Name[] = {
     "User Software Interrupt",
@@ -135,7 +135,16 @@ static volatile int nested = 0;
 #define ENTER_TRAP nested += 1
 #define EXIT_TRAP  nested -= 1
 #define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \
-    if (nested != 1) handle_nested_trap_panic(cause, tval, epc, eframe)
+    if (nested != 1)                                 \
+    handle_nested_trap_panic(cause, tval, epc, eframe)
+#else
+/* Add trap nesting detection under the SMP architecture. */
+static volatile int nested[RT_CPUS_NR] = { 0 };
+#define ENTER_TRAP nested[rt_hw_cpu_id()] += 1
+#define EXIT_TRAP  nested[rt_hw_cpu_id()] -= 1
+#define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \
+    if (nested[rt_hw_cpu_id()] != 1)                 \
+    handle_nested_trap_panic(cause, tval, epc, eframe)
 #endif /* RT_USING_SMP */
 
 static const char *get_exception_msg(int id)
@@ -165,44 +174,44 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
     enum rt_mm_fault_type fault_type;
     switch (id)
     {
-        case EP_LOAD_PAGE_FAULT:
-            fault_op = MM_FAULT_OP_READ;
-            fault_type = MM_FAULT_TYPE_GENERIC_MMU;
-            break;
-        case EP_LOAD_ACCESS_FAULT:
-            fault_op = MM_FAULT_OP_READ;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        case EP_LOAD_ADDRESS_MISALIGNED:
-            fault_op = MM_FAULT_OP_READ;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        case EP_STORE_PAGE_FAULT:
-            fault_op = MM_FAULT_OP_WRITE;
-            fault_type = MM_FAULT_TYPE_GENERIC_MMU;
-            break;
-        case EP_STORE_ACCESS_FAULT:
-            fault_op = MM_FAULT_OP_WRITE;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        case EP_STORE_ADDRESS_MISALIGNED:
-            fault_op = MM_FAULT_OP_WRITE;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        case EP_INSTRUCTION_PAGE_FAULT:
-            fault_op = MM_FAULT_OP_EXECUTE;
-            fault_type = MM_FAULT_TYPE_GENERIC_MMU;
-            break;
-        case EP_INSTRUCTION_ACCESS_FAULT:
-            fault_op = MM_FAULT_OP_EXECUTE;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        case EP_INSTRUCTION_ADDRESS_MISALIGNED:
-            fault_op = MM_FAULT_OP_EXECUTE;
-            fault_type = MM_FAULT_TYPE_BUS_ERROR;
-            break;
-        default:
-            fault_op = 0;
+    case EP_LOAD_PAGE_FAULT:
+        fault_op = MM_FAULT_OP_READ;
+        fault_type = MM_FAULT_TYPE_GENERIC_MMU;
+        break;
+    case EP_LOAD_ACCESS_FAULT:
+        fault_op = MM_FAULT_OP_READ;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    case EP_LOAD_ADDRESS_MISALIGNED:
+        fault_op = MM_FAULT_OP_READ;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    case EP_STORE_PAGE_FAULT:
+        fault_op = MM_FAULT_OP_WRITE;
+        fault_type = MM_FAULT_TYPE_GENERIC_MMU;
+        break;
+    case EP_STORE_ACCESS_FAULT:
+        fault_op = MM_FAULT_OP_WRITE;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    case EP_STORE_ADDRESS_MISALIGNED:
+        fault_op = MM_FAULT_OP_WRITE;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    case EP_INSTRUCTION_PAGE_FAULT:
+        fault_op = MM_FAULT_OP_EXECUTE;
+        fault_type = MM_FAULT_TYPE_GENERIC_MMU;
+        break;
+    case EP_INSTRUCTION_ACCESS_FAULT:
+        fault_op = MM_FAULT_OP_EXECUTE;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    case EP_INSTRUCTION_ADDRESS_MISALIGNED:
+        fault_op = MM_FAULT_OP_EXECUTE;
+        fault_type = MM_FAULT_TYPE_BUS_ERROR;
+        break;
+    default:
+        fault_op = 0;
     }
 
     if (fault_op)
@@ -228,7 +237,7 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
     dump_regs(sp);
 
     rt_thread_t cur_thr = rt_thread_self();
-    struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc};
+    struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc };
     rt_kprintf("fp = %p\n", frame.fp);
     lwp_backtrace_frame(cur_thr, &frame);
 
@@ -260,12 +269,12 @@ static int illegal_inst_recoverable(rt_ubase_t stval,
 
     switch (opcode)
     {
-        case 0x57: // V
-        case 0x27: // scalar FLOAT
-        case 0x07:
-        case 0x73: // CSR
-            flag = 1;
-            break;
+    case 0x57: // V
+    case 0x27: // scalar FLOAT
+    case 0x07:
+    case 0x73: // CSR
+        flag = 1;
+        break;
     }
 
     if (flag)
@@ -314,6 +323,15 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
         tick_isr();
         rt_interrupt_leave();
     }
+#ifdef RT_USING_SMP
+    else if ((SCAUSE_INTERRUPT | SCAUSE_S_SOFTWARE_INTR) == scause)
+    {
+        /* supervisor software interrupt for ipi */
+        rt_interrupt_enter();
+        rt_hw_ipi_handler();
+        rt_interrupt_leave();
+    }
+#endif /* RT_USING_SMP */
     else
     {
         if (SCAUSE_INTERRUPT & scause)
@@ -364,7 +382,7 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc,
         rt_kprintf("current thread: %s\n", cur_thr->parent.name);
 
         rt_kprintf("--------------Backtrace--------------\n");
-        struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc};
+        struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc };
 
 #ifdef RT_USING_SMART
         if (!(sp->sstatus & 0x100))

+ 146 - 2
libcpu/risc-v/virt64/interrupt.c

@@ -17,6 +17,12 @@
 
 struct rt_irq_desc irq_desc[MAX_HANDLERS];
 
+#ifdef RT_USING_SMP
+#include "sbi.h"
+struct rt_irq_desc ipi_desc[RT_MAX_IPI];
+uint8_t ipi_vectors[RT_CPUS_NR] = { 0 };
+#endif /* RT_USING_SMP */
+
 static rt_isr_handler_t rt_hw_interrupt_handle(rt_uint32_t vector, void *param)
 {
     rt_kprintf("UN-handled interrupt %d occurred!!!\n", vector);
@@ -53,11 +59,11 @@ void rt_hw_interrupt_umask(int vector)
  * @param old_handler the old interrupt service routine
  */
 rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler,
-        void *param, const char *name)
+                                         void *param, const char *name)
 {
     rt_isr_handler_t old_handler = RT_NULL;
 
-    if(vector < MAX_HANDLERS)
+    if (vector < MAX_HANDLERS)
     {
         old_handler = irq_desc[vector].handler;
         if (handler != RT_NULL)
@@ -92,3 +98,141 @@ void rt_hw_interrupt_init()
 
     plic_set_threshold(0);
 }
+
+#ifdef RT_USING_SMP
+void rt_hw_interrupt_set_priority(int vector, unsigned int priority)
+{
+    plic_set_priority(vector, priority);
+}
+
+unsigned int rt_hw_interrupt_get_priority(int vector)
+{
+    return (*(uint32_t *)PLIC_PRIORITY(vector));
+}
+
+rt_bool_t rt_hw_interrupt_is_disabled(void)
+{
+    /* Determine the interrupt enable state */
+    rt_ubase_t sstatus;
+    __asm__ volatile("csrr %0, sstatus" : "=r"(sstatus));
+    return (sstatus & SSTATUS_SIE) == 0;
+}
+
+void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
+{
+    _lock->slock = 0;
+}
+
+void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
+{
+    /* Use ticket lock implemented on top of the 32/64-bit atomic AMO ops.
+     * The combined word layout (slock) maps two uint16_t fields:
+     *   low 16 bits: owner
+     *   high 16 bits: next (ticket allocator)
+     * We atomically increment the "next" field by (1 << 16) and use the
+     * returned old value to compute our ticket. Then wait until owner == ticket.
+     */
+    rt_atomic_t prev;
+    rt_atomic_t ticket;
+    rt_atomic_t owner;
+
+    /* Allocate a ticket by adding (1 << 16) to slock, prev holds previous value */
+    prev = rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)(1UL << 16));
+    ticket = (prev >> 16) & 0xffffUL;
+
+    /* Wait until owner equals our ticket */
+    for (;;)
+    {
+        owner = rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & 0xffffUL;
+        if (owner == ticket)
+            break;
+        /* TODO: low-power wait for interrupt while spinning */
+    }
+
+    /* Ensure all following memory accesses are ordered after acquiring the lock */
+    __asm__ volatile("fence rw, rw" ::: "memory");
+}
+
+void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
+{
+    /* Ensure memory operations before unlock are visible before owner increment */
+    __asm__ volatile("fence rw, rw" ::: "memory");
+
+    /* Increment owner (low 16 bits) to hand over lock to next ticket.
+     * Use an atomic load of the combined slock word and compare the low
+     * 16-bit owner field.If owner would overflow (0xffff), clear the owner field
+     * atomically by ANDing with 0xffff0000; otherwise increment owner by 1.
+     */
+    if ((rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & (rt_atomic_t)0xffffUL) == (rt_atomic_t)0xffffUL)
+    {
+        /* Atomic clear owner (low 16 bits) when it overflows. Keep next ticket field. */
+        rt_hw_atomic_and((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)0xffff0000UL);
+    }
+    else
+    {
+        rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1);
+    }
+
+    // TODO: IPI interrupt to wake up other harts waiting for the lock
+
+    /* Make the increment visible to other harts */
+    __asm__ volatile("fence rw, rw" ::: "memory");
+}
+
+void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask)
+{
+    int cpuid = __builtin_ctz(cpu_mask); // get the bit position of the lowest set bit
+    ipi_vectors[cpuid] |= (uint8_t)ipi_vector;
+    sbi_send_ipi((const unsigned long *)&cpu_mask);
+}
+
+void rt_hw_ipi_init(void)
+{
+    int idx = 0, cpuid = rt_cpu_get_id();
+    ipi_vectors[cpuid] = 0;
+    /* init exceptions table */
+    for (idx = 0; idx < RT_MAX_IPI; idx++)
+    {
+        ipi_desc[idx].handler = RT_NULL;
+        ipi_desc[idx].param = RT_NULL;
+#ifdef RT_USING_INTERRUPT_INFO
+        rt_snprintf(ipi_desc[idx].name, RT_NAME_MAX - 1, "default");
+        ipi_desc[idx].counter = 0;
+#endif
+    }
+    set_csr(sie, SIP_SSIP);
+}
+
+void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler)
+{
+    if (ipi_vector < RT_MAX_IPI)
+    {
+        if (ipi_isr_handler != RT_NULL)
+        {
+            ipi_desc[ipi_vector].handler = (rt_isr_handler_t)ipi_isr_handler;
+            ipi_desc[ipi_vector].param = RT_NULL;
+        }
+    }
+}
+
+void rt_hw_ipi_handler(void)
+{
+    rt_uint32_t ipi_vector;
+
+    ipi_vector = ipi_vectors[rt_cpu_get_id()];
+    while (ipi_vector)
+    {
+        int bitpos = __builtin_ctz(ipi_vector);
+        ipi_vector &= ~(1 << bitpos);
+        if (bitpos < RT_MAX_IPI && ipi_desc[bitpos].handler != RT_NULL)
+        {
+            rt_hw_atomic_and((volatile rt_atomic_t *)&ipi_vectors[rt_cpu_get_id()], ~((rt_atomic_t)(1 << bitpos)));
+            /* call the irq service routine */
+            ipi_desc[bitpos].handler(bitpos, ipi_desc[bitpos].param);
+        }
+    }
+
+    // TODO: Clear the software interrupt pending bit in CLINT
+    clear_csr(sip, SIP_SSIP);
+}
+#endif /* RT_USING_SMP */

+ 8 - 1
libcpu/risc-v/virt64/interrupt.h

@@ -42,5 +42,12 @@ void rt_hw_interrupt_init(void);
 void rt_hw_interrupt_mask(int vector);
 rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler, void *param, const char *name);
 void handle_trap(rt_ubase_t xcause, rt_ubase_t xtval, rt_ubase_t xepc, struct rt_hw_stack_frame *sp);
-
+#ifdef RT_USING_SMP
+void rt_hw_interrupt_set_priority(int vector, unsigned int priority);
+unsigned int rt_hw_interrupt_get_priority(int vector);
+void rt_hw_ipi_handler(void);
+void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler);
+void rt_hw_ipi_init(void);
+void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask);
+#endif /* RT_USING_SMP */
 #endif