فهرست منبع

Merge branch 'feature/esp32p4_fpu_support' into 'master'

feat(riscv): implement FPU support for RISC-V targets

Closes IDF-7770

See merge request espressif/esp-idf!25871
Omar Chebib 2 سال پیش
والد
کامیت
51434a8367

+ 21 - 15
components/esp_system/port/arch/riscv/panic_arch.c

@@ -259,11 +259,28 @@ static inline void print_memprot_err_details(const void *frame __attribute__((un
 }
 #endif
 
+static void panic_print_register_array(const char* names[], const uint32_t* regs, int size)
+{
+    const int regs_per_line = 4;
+    for (int i = 0; i < size; i++) {
+        if (i % regs_per_line == 0) {
+            panic_print_str("\r\n");
+        }
+        panic_print_str(names[i]);
+        panic_print_str(": 0x");
+        panic_print_hex(regs[i]);
+        panic_print_str("  ");
+    }
+}
+
+
 void panic_print_registers(const void *f, int core)
 {
-    uint32_t *regs = (uint32_t *)f;
+    const RvExcFrame *frame = (RvExcFrame *)f;
 
-    // only print ABI name
+    /**
+     * General Purpose context, only print ABI name
+     */
     const char *desc[] = {
         "MEPC    ", "RA      ", "SP      ", "GP      ", "TP      ", "T0      ", "T1      ", "T2      ",
         "S0/FP   ", "S1      ", "A0      ", "A1      ", "A2      ", "A3      ", "A4      ", "A5      ",
@@ -273,20 +290,9 @@ void panic_print_registers(const void *f, int core)
     };
 
     panic_print_str("Core ");
-    panic_print_dec(((RvExcFrame *)f)->mhartid);
+    panic_print_dec(frame->mhartid);
     panic_print_str(" register dump:");
-
-    for (int x = 0; x < sizeof(desc) / sizeof(desc[0]); x += 4) {
-        panic_print_str("\r\n");
-        for (int y = 0; y < 4 && x + y < sizeof(desc) / sizeof(desc[0]); y++) {
-            if (desc[x + y][0] != 0) {
-                panic_print_str(desc[x + y]);
-                panic_print_str(": 0x");
-                panic_print_hex(regs[x + y]);
-                panic_print_str("  ");
-            }
-        }
-    }
+    panic_print_register_array(desc, f, DIM(desc));
 }
 
 /**

+ 0 - 20
components/esp_system/port/cpu_start.c

@@ -191,16 +191,6 @@ void IRAM_ATTR call_start_cpu1(void)
     );
 #endif  //#ifdef __riscv
 
-#if CONFIG_IDF_TARGET_ESP32P4
-    //TODO: IDF-7770
-    //set mstatus.fs=2'b01, floating-point unit in the initialization state
-    asm volatile(
-        "li t0, 0x2000\n"
-        "csrrs t0, mstatus, t0\n"
-        :::"t0"
-    );
-#endif  //#if CONFIG_IDF_TARGET_ESP32P4
-
 #if SOC_BRANCH_PREDICTOR_SUPPORTED
     esp_cpu_branch_prediction_enable();
 #endif  //#if SOC_BRANCH_PREDICTOR_SUPPORTED
@@ -387,16 +377,6 @@ void IRAM_ATTR call_start_cpu0(void)
     );
 #endif
 
-#if CONFIG_IDF_TARGET_ESP32P4
-    //TODO: IDF-7770
-    //set mstatus.fs=2'b01, floating-point unit in the initialization state
-    asm volatile(
-        "li t0, 0x2000\n"
-        "csrrs t0, mstatus, t0\n"
-        :::"t0"
-    );
-#endif  //#if CONFIG_IDF_TARGET_ESP32P4
-
 #if SOC_BRANCH_PREDICTOR_SUPPORTED
     esp_cpu_branch_prediction_enable();
 #endif

+ 10 - 1
components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/portasm.S

@@ -71,11 +71,19 @@ rtos_enter_end:
     ret
 
 /**
- * Restores the context of the next task.
+ * @brief Restore the stack pointer of the next task to run.
+ *
+ * @param a0 Former mstatus
+ *
+ * @returns New mstatus
  */
     .global rtos_int_exit
     .type rtos_int_exit, @function
 rtos_int_exit:
+    /* To speed up this routine and because this current routine is only meant to be called from the interrupt
+     * handler, let's use callee-saved registers instead of stack space. Registers `s3-s11` are not used by
+     * the caller */
+    mv s11, a0
     /* may skip RTOS aware interrupt since scheduler was not started */
     lw t0, uxSchedulerRunning
     beq t0,zero, rtos_exit_end
@@ -137,4 +145,5 @@ no_switch:
 #endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */
 
 rtos_exit_end:
+    mv a0, s11                         /* a0 = new mstatus */
     ret

+ 198 - 1
components/freertos/FreeRTOS-Kernel/portable/riscv/port.c

@@ -59,6 +59,18 @@
 #include "soc/hp_system_reg.h"
 #endif
 
+#if ( SOC_CPU_COPROC_NUM > 0 )
+
+#include "esp_private/panic_internal.h"
+
+/* Since `portFORCE_INLINE` is not defined in `portmacro.h`, we must define it here since it is
+ * used by `atomic.h`. */
+#define portFORCE_INLINE    inline
+#include "freertos/atomic.h"
+
+#endif // ( SOC_CPU_COPROC_NUM > 0 )
+
+
 _Static_assert(portBYTE_ALIGNMENT == 16, "portBYTE_ALIGNMENT must be set to 16");
 #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
 /**
@@ -82,6 +94,13 @@ volatile UBaseType_t port_uxCriticalNesting[portNUM_PROCESSORS] = {0};
 volatile UBaseType_t port_uxOldInterruptState[portNUM_PROCESSORS] = {0};
 volatile UBaseType_t xPortSwitchFlag[portNUM_PROCESSORS] = {0};
 
+#if ( SOC_CPU_COPROC_NUM > 0 )
+
+/* Current owner of the coprocessors for each core */
+StaticTask_t* port_uxCoprocOwner[portNUM_PROCESSORS][SOC_CPU_COPROC_NUM];
+
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
 /*
 *******************************************************************************
 * Interrupt stack. The size of the interrupt stack is determined by the config
@@ -104,6 +123,10 @@ StackType_t *xIsrStackBottom[portNUM_PROCESSORS] = {0};
 
 BaseType_t xPortStartScheduler(void)
 {
+#if ( SOC_CPU_COPROC_NUM > 0 )
+    /* Disable FPU so that the first task to use it will trigger an exception */
+    rv_utils_disable_fpu();
+#endif
     /* Initialize all kernel state tracking variables */
     BaseType_t coreID = xPortGetCoreID();
     port_uxInterruptNesting[coreID] = 0;
@@ -238,6 +261,58 @@ static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters)
 }
 #endif // CONFIG_FREERTOS_TASK_FUNCTION_WRAPPER
 
+
+#if ( SOC_CPU_COPROC_NUM > 0 )
+
+/**
+ * @brief Retrieve or allocate coprocessors save area from the given pxTopOfStack address.
+ *
+ * @param pxTopOfStack End of the stack address. This represents the highest address of a Task's stack.
+ */
+FORCE_INLINE_ATTR RvCoprocSaveArea* pxRetrieveCoprocSaveAreaFromStackPointer(UBaseType_t pxTopOfStack)
+{
+    return (RvCoprocSaveArea*) STACKPTR_ALIGN_DOWN(16, pxTopOfStack - sizeof(RvCoprocSaveArea));
+}
+
+/**
+ * @brief Allocate and initialize the coprocessors save area on the stack
+ *
+ * @param[in] uxStackPointer Current stack pointer address
+ *
+ * @return Stack pointer that points to allocated and initialized the coprocessor save area
+ */
+FORCE_INLINE_ATTR UBaseType_t uxInitialiseCoprocSaveArea(UBaseType_t uxStackPointer)
+{
+    RvCoprocSaveArea* sa = pxRetrieveCoprocSaveAreaFromStackPointer(uxStackPointer);
+    memset(sa, 0, sizeof(RvCoprocSaveArea));
+    return (UBaseType_t) sa;
+}
+
+
+static void vPortCleanUpCoprocArea(void *pvTCB)
+{
+    StaticTask_t* task = (StaticTask_t*) pvTCB;
+
+    /* Get a pointer to the task's coprocessor save area */
+    const UBaseType_t bottomstack = (UBaseType_t) task->pxDummy8;
+    RvCoprocSaveArea* sa = pxRetrieveCoprocSaveAreaFromStackPointer(bottomstack);
+
+    /* If the Task used any coprocessor, check if it is the actual owner of any.
+     * If yes, reset the owner. */
+    if (sa->sa_enable != 0) {
+        /* Get the core the task is pinned on */
+        const BaseType_t coreID = task->xDummyCoreID;
+
+        for (int i = 0; i < SOC_CPU_COPROC_NUM; i++) {
+            StaticTask_t** owner = &port_uxCoprocOwner[coreID][i];
+            /* If the owner is `task`, replace it with NULL atomically */
+            Atomic_CompareAndSwapPointers_p32((void**) owner, NULL, task);
+        }
+    }
+}
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
+
 /**
  * @brief Initialize the task's starting interrupt stack frame
  *
@@ -304,12 +379,38 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC
 
     - All stack areas are aligned to 16 byte boundary
     - We use UBaseType_t for all of stack area initialization functions for more convenient pointer arithmetic
+
+    In the case of targets that have coprocessors, the stack is presented as follows:
+    HIGH ADDRESS
+    |---------------------------| <- pxTopOfStack on entry
+    | Coproc. Save Area         | <- RvCoprocSaveArea
+    | ------------------------- |
+    | TLS Variables             |
+    | ------------------------- | <- Start of useable stack
+    | Starting stack frame      |
+    | ------------------------- | <- pxTopOfStack on return (which is the tasks current SP)
+    |             |             |
+    |             |             |
+    |             V             |
+    |---------------------------|
+    | Coproc. m Saved Context   | <- Coprocessor context save area after allocation
+    |---------------------------|
+    | Coproc. n Saved Context   | <- Another coprocessor context save area after allocation
+    ----------------------------- <- Bottom of stack
+    LOW ADDRESS
+
+    Where m != n, n < SOC_CPU_COPROC_NUM, m < SOC_CPU_COPROC_NUM
+
     */
 
     UBaseType_t uxStackPointer = (UBaseType_t)pxTopOfStack;
     configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
 
-    // IDF-7770: Support FPU context save area for P4
+#if ( SOC_CPU_COPROC_NUM > 0 )
+    // Initialize the coprocessors save area
+    uxStackPointer = uxInitialiseCoprocSaveArea(uxStackPointer);
+    configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
+#endif // SOC_CPU_COPROC_NUM > 0
 
     // Initialize GCC TLS area
     uint32_t threadptr_reg_init;
@@ -647,8 +748,104 @@ void vPortTCBPreDeleteHook( void *pxTCB )
         /* Call TLS pointers deletion callbacks */
         vPortTLSPointersDelCb( pxTCB );
     #endif /* CONFIG_FREERTOS_TLSP_DELETION_CALLBACKS */
+
+    #if ( SOC_CPU_COPROC_NUM > 0 )
+        /* Cleanup coproc save area */
+        vPortCleanUpCoprocArea( pxTCB );
+    #endif /* SOC_CPU_COPROC_NUM > 0 */
 }
 
+
+#if ( SOC_CPU_COPROC_NUM > 0 )
+
+// ----------------------- Coprocessors --------------------------
+
+/**
+ * @brief Pin the given task to the given core
+ *
+ * This function is called when a task uses a coprocessor. Since the coprocessors registers
+ * are saved lazily, as soon as a task starts using one, it must always be scheduled on the core
+ * it is currently executing on.
+ */
+void vPortTaskPinToCore(StaticTask_t* task, int coreid)
+{
+    task->xDummyCoreID = coreid;
+}
+
+
+/**
+ * @brief Get coprocessor save area out of the given task. If the coprocessor area is not created,
+ *        it shall be allocated.
+ */
+RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, int coproc)
+{
+    const UBaseType_t bottomstack = (UBaseType_t) task->pxDummy8;
+    RvCoprocSaveArea* sa = pxRetrieveCoprocSaveAreaFromStackPointer(bottomstack);
+    /* Check if the allocator is NULL. Since we don't have a way to get the end of the stack
+     * during its initialization, we have to do this here */
+    if (sa->sa_allocator == 0) {
+        sa->sa_allocator = (UBaseType_t) task->pxDummy6;
+    }
+
+    /* Check if coprocessor area is allocated */
+    if (sa->sa_coprocs[coproc] == NULL) {
+        const uint32_t coproc_sa_sizes[] = {
+            RV_COPROC0_SIZE, RV_COPROC1_SIZE
+        };
+        /* Allocate the save area at end of the allocator */
+        UBaseType_t allocated = sa->sa_allocator + coproc_sa_sizes[coproc];
+        sa->sa_coprocs[coproc] = (void*) allocated;
+        /* Update the allocator address for next use */
+        sa->sa_allocator = allocated;
+    }
+    return sa;
+}
+
+
+/**
+ * @brief Update given coprocessor owner and get the address of former owner's save area.
+ *
+ * This function is called when the current running task has poked a coprocessor's register which
+ * was used by a previous task. We have to save the coprocessor context (registers) inside the
+ * current owner's save area and change the ownership. The coprocessor will be marked as used in
+ * the new owner's coprocessor save area.
+ *
+ * @param coreid    Current core
+ * @param coproc    Coprocessor to save context of
+ *
+ * @returns Coprocessor former owner's save area
+ */
+RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner)
+{
+    RvCoprocSaveArea* sa = NULL;
+    /* Address of coprocessor owner */
+    StaticTask_t** owner_addr = &port_uxCoprocOwner[ coreid ][ coproc ];
+    /* Atomically exchange former owner with the new one */
+    StaticTask_t* former = Atomic_SwapPointers_p32((void**) owner_addr, owner);
+    /* Get the save area of former owner */
+    if (former != NULL) {
+        sa = pxPortGetCoprocArea(former, coproc);
+    }
+    return sa;
+}
+
+
+/**
+ * @brief Aborts execution when a coprocessor was used in an ISR context
+ */
+void vPortCoprocUsedInISR(void* frame)
+{
+    extern void xt_unhandled_exception(void*);
+    /* Since this function is called from an exception handler, the interrupts are disabled,
+     * as such, it is not possible to trigger another exception as would `abort` do.
+     * Simulate an abort without actually triggering an exception. */
+    g_panic_abort = true;
+    g_panic_abort_details = (char *) "ERROR: Coprocessors must not be used in ISRs!\n";
+    xt_unhandled_exception(frame);
+}
+
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
 /* ---------------------------------------------- Misc Implementations -------------------------------------------------
  *
  * ------------------------------------------------------------------------------------------------------------------ */

+ 251 - 16
components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S

@@ -7,8 +7,9 @@
 #include "portmacro.h"
 #include "freertos/FreeRTOSConfig.h"
 #include "soc/soc_caps.h"
+#include "riscv/rvruntime-frames.h"
 
-.extern pxCurrentTCBs
+    .extern pxCurrentTCBs
 
 #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
 #include "esp_private/hw_stack_guard.h"
@@ -22,8 +23,6 @@
     .global xPortSwitchFlag
 #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
     .global xIsrStackBottom
-    .global port_offset_pxStack
-    .global port_offset_pxEndOfStack
     .global esp_hw_stack_guard_monitor_stop
     .global esp_hw_stack_guard_monitor_start
     .global esp_hw_stack_guard_set_bounds
@@ -31,6 +30,210 @@
 
     .section .text
 
+
+#if SOC_CPU_COPROC_NUM > 0
+
+#if SOC_CPU_HAS_FPU
+
+/* Bit to set in mstatus to enable the FPU */
+#define CSR_MSTATUS_FPU_ENABLE      (1 << 13)
+/* Bit to clear in mstatus to disable the FPU */
+#define CSR_MSTATUS_FPU_DISABLE     (3 << 13)
+
+.macro save_fpu_regs frame=sp
+    fsw     ft0,  RV_FPU_FT0(\frame)
+    fsw     ft1,  RV_FPU_FT1(\frame)
+    fsw     ft2,  RV_FPU_FT2(\frame)
+    fsw     ft3,  RV_FPU_FT3(\frame)
+    fsw     ft4,  RV_FPU_FT4(\frame)
+    fsw     ft5,  RV_FPU_FT5(\frame)
+    fsw     ft6,  RV_FPU_FT6(\frame)
+    fsw     ft7,  RV_FPU_FT7(\frame)
+    fsw     fs0,  RV_FPU_FS0(\frame)
+    fsw     fs1,  RV_FPU_FS1(\frame)
+    fsw     fa0,  RV_FPU_FA0(\frame)
+    fsw     fa1,  RV_FPU_FA1(\frame)
+    fsw     fa2,  RV_FPU_FA2(\frame)
+    fsw     fa3,  RV_FPU_FA3(\frame)
+    fsw     fa4,  RV_FPU_FA4(\frame)
+    fsw     fa5,  RV_FPU_FA5(\frame)
+    fsw     fa6,  RV_FPU_FA6(\frame)
+    fsw     fa7,  RV_FPU_FA7(\frame)
+    fsw     fs2,  RV_FPU_FS2(\frame)
+    fsw     fs3,  RV_FPU_FS3(\frame)
+    fsw     fs4,  RV_FPU_FS4(\frame)
+    fsw     fs5,  RV_FPU_FS5(\frame)
+    fsw     fs6,  RV_FPU_FS6(\frame)
+    fsw     fs7,  RV_FPU_FS7(\frame)
+    fsw     fs8,  RV_FPU_FS8(\frame)
+    fsw     fs9,  RV_FPU_FS9(\frame)
+    fsw     fs10, RV_FPU_FS10(\frame)
+    fsw     fs11, RV_FPU_FS11(\frame)
+    fsw     ft8,  RV_FPU_FT8 (\frame)
+    fsw     ft9,  RV_FPU_FT9 (\frame)
+    fsw     ft10, RV_FPU_FT10(\frame)
+    fsw     ft11, RV_FPU_FT11(\frame)
+.endm
+
+.macro restore_fpu_regs frame=sp
+    flw     ft0,  RV_FPU_FT0(\frame)
+    flw     ft1,  RV_FPU_FT1(\frame)
+    flw     ft2,  RV_FPU_FT2(\frame)
+    flw     ft3,  RV_FPU_FT3(\frame)
+    flw     ft4,  RV_FPU_FT4(\frame)
+    flw     ft5,  RV_FPU_FT5(\frame)
+    flw     ft6,  RV_FPU_FT6(\frame)
+    flw     ft7,  RV_FPU_FT7(\frame)
+    flw     fs0,  RV_FPU_FS0(\frame)
+    flw     fs1,  RV_FPU_FS1(\frame)
+    flw     fa0,  RV_FPU_FA0(\frame)
+    flw     fa1,  RV_FPU_FA1(\frame)
+    flw     fa2,  RV_FPU_FA2(\frame)
+    flw     fa3,  RV_FPU_FA3(\frame)
+    flw     fa4,  RV_FPU_FA4(\frame)
+    flw     fa5,  RV_FPU_FA5(\frame)
+    flw     fa6,  RV_FPU_FA6(\frame)
+    flw     fa7,  RV_FPU_FA7(\frame)
+    flw     fs2,  RV_FPU_FS2(\frame)
+    flw     fs3,  RV_FPU_FS3(\frame)
+    flw     fs4,  RV_FPU_FS4(\frame)
+    flw     fs5,  RV_FPU_FS5(\frame)
+    flw     fs6,  RV_FPU_FS6(\frame)
+    flw     fs7,  RV_FPU_FS7(\frame)
+    flw     fs8,  RV_FPU_FS8(\frame)
+    flw     fs9,  RV_FPU_FS9(\frame)
+    flw     fs10, RV_FPU_FS10(\frame)
+    flw     fs11, RV_FPU_FS11(\frame)
+    flw     ft8,  RV_FPU_FT8(\frame)
+    flw     ft9,  RV_FPU_FT9(\frame)
+    flw     ft10, RV_FPU_FT10(\frame)
+    flw     ft11, RV_FPU_FT11(\frame)
+.endm
+
+
+.macro fpu_read_dirty_bit reg
+    csrr    \reg, mstatus
+    srli    \reg, \reg, 13
+    andi    \reg, \reg, 1
+.endm
+
+
+.macro fpu_clear_dirty_bit reg
+    li      \reg, 1 << 13
+    csrc    mstatus, \reg
+.endm
+
+
+.macro fpu_enable reg
+    li      \reg, CSR_MSTATUS_FPU_ENABLE
+    csrs   mstatus, \reg
+.endm
+
+
+.macro fpu_disable reg
+    li      \reg, CSR_MSTATUS_FPU_DISABLE
+    csrc   mstatus, \reg
+.endm
+
+    .global vPortTaskPinToCore
+    .global vPortCoprocUsedInISR
+    .global pxPortUpdateCoprocOwner
+
+/**
+ * @brief Save the current FPU context in the FPU owner's save area
+ *
+ * @param sp Interuptee's RvExcFrame address
+ *
+ * Note: Since this routine is ONLY meant to be called from _panic_handler routine,
+ * it is possible to alter `s0-s11` registers
+ */
+    .global rtos_save_fpu_coproc
+    .type rtos_save_fpu_coproc, @function
+rtos_save_fpu_coproc:
+    /* If we are in an interrupt context, we have to abort. We don't allow using the FPU from ISR */
+#if ( configNUM_CORES > 1 )
+    csrr  a2, mhartid                     /* a2 = coreID */
+    slli  a2, a2, 2                       /* a2 = coreID * 4 */
+    la    a1, port_uxInterruptNesting     /* a1 = &port_uxInterruptNesting */
+    add   a1, a1, a2                      /* a1 = &port_uxInterruptNesting[coreID] */
+    lw    a1, 0(a1)                       /* a1 = port_uxInterruptNesting[coreID] */
+#else /* ( configNUM_CORES <= 1 ) */
+    lw    a1, (port_uxInterruptNesting)   /* a1 = port_uxInterruptNesting */
+#endif /* ( configNUM_CORES > 1 ) */
+    /* SP still contains the RvExcFrame address */
+    mv    a0, sp
+    bnez  a1, vPortCoprocUsedInISR
+    /* Enable the FPU needed by the current task */
+    fpu_enable a1
+    mv    s0, ra
+    call  rtos_current_tcb
+    /* If the current TCB is NULL, the FPU is used during initialization, even before
+     * the scheduler started. Consider this a valid usage, the FPU will be disabled
+     * as soon as the scheduler is started anyway*/
+    beqz  a0, rtos_save_fpu_coproc_norestore
+    mv    s1, a0                    /* s1 = pxCurrentTCBs */
+    /* Prepare parameters of pxPortUpdateCoprocOwner */
+    mv    a2, a0
+    li    a1, FPU_COPROC_IDX
+    csrr  a0, mhartid
+    call  pxPortUpdateCoprocOwner
+    /* If the save area is NULL, no need to save context */
+    beqz  a0, rtos_save_fpu_coproc_nosave
+    /* Save the FPU context in the structure */
+    lw    a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0)      /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */
+    save_fpu_regs a0
+    csrr  a1, fcsr
+    sw    a1, RV_FPU_FCSR(a0)
+rtos_save_fpu_coproc_nosave:
+    /* Pin current task to current core */
+    mv    a0, s1
+    csrr  a1, mhartid
+    call  vPortTaskPinToCore
+    /* Check if we have to restore a previous FPU context from the current TCB */
+    mv    a0, s1
+    call  pxPortGetCoprocArea
+    /* Get the enable flags from the coprocessor save area */
+    lw    a1, RV_COPROC_ENABLE(a0)
+    /* To avoid having branches below, set the FPU enable flag now */
+    ori   a2, a1, 1 << FPU_COPROC_IDX
+    sw    a2, RV_COPROC_ENABLE(a0)
+    /* Check if the former FPU enable bit was set */
+    andi  a2, a1, 1 << FPU_COPROC_IDX
+    beqz  a2, rtos_save_fpu_coproc_norestore
+    /* FPU enable bit was set, restore the FPU context */
+    lw    a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0)      /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */
+    restore_fpu_regs a0
+    lw    a1, RV_FPU_FCSR(a0)
+    csrw  fcsr, a1
+rtos_save_fpu_coproc_norestore:
+    /* Return from routine via s0, instead of ra */
+    jr    s0
+    .size rtos_save_fpu_coproc, .-rtos_save_fpu_coproc
+
+#endif /* SOC_CPU_HAS_FPU */
+
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
+
+/**
+ * @brief Get current TCB on current core
+ */
+    .type rtos_current_tcb, @function
+rtos_current_tcb:
+#if ( configNUM_CORES > 1 )
+    csrr    a1, mhartid
+    slli    a1, a1, 2
+    la      a0, pxCurrentTCBs               /* a0 = &pxCurrentTCBs */
+    add     a0, a0, a1                      /* a0 = &pxCurrentTCBs[coreID] */
+    lw      a0, 0(a0)                       /* a0 = pxCurrentTCBs[coreID] */
+#else
+    /* Recover the stack of next task */
+    lw      a0, pxCurrentTCBs
+#endif /* ( configNUM_CORES > 1 ) */
+    ret
+    .size, .-rtos_current_tcb
+
+
 /**
  * This function makes the RTOS aware about an ISR entering. It takes the
  * current task stack pointer and places it into the pxCurrentTCBs.
@@ -65,6 +268,13 @@ rtos_int_enter:
     /* If we reached here from another low-priority ISR, i.e, port_uxInterruptNesting[coreID] > 0, then skip stack pushing to TCB */
     bnez    a1, rtos_int_enter_end          /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */
 
+#if SOC_CPU_COPROC_NUM > 0
+    /* Disable the FPU to forbid the ISR from using it. We don't need to re-enable it manually since the caller
+     * will restore `mstatus` before returning from interrupt. */
+    fpu_disable a0
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
+
 #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
     /* esp_hw_stack_guard_monitor_stop(); pass the scratch registers */
     ESP_HW_STACK_GUARD_MONITOR_STOP_CUR_CORE a0 a1
@@ -106,11 +316,19 @@ rtos_int_enter_end:
     ret
 
 /**
- * Restore the stack pointer of the next task to run.
+ * @brief Restore the stack pointer of the next task to run.
+ *
+ * @param a0 Former mstatus
+ *
+ * @returns New mstatus (potentially with coprocessors disabled)
  */
     .global rtos_int_exit
     .type rtos_int_exit, @function
 rtos_int_exit:
+    /* To speed up this routine and because this current routine is only meant to be called from the interrupt
+     * handler, let's use callee-saved registers instead of stack space. Registers `s3-s11` are not used by
+     * the caller */
+    mv      s11, a0
 #if ( configNUM_CORES > 1 )
     csrr    a1, mhartid                     /* a1 = coreID */
     slli    a1, a1, 2                       /* a1 = a1 * 4 */
@@ -120,21 +338,21 @@ rtos_int_exit:
 #else
     lw      a0, port_xSchedulerRunning      /* a0 = port_xSchedulerRunning */
 #endif /* ( configNUM_CORES > 1 ) */
-    beqz    a0, rtos_int_exit_end           /* if (port_uxSchewdulerRunning == 0) jump to rtos_int_exit_end */
+    beqz    a0, rtos_int_exit_end           /* if (port_uxSchedulerRunning == 0) jump to rtos_int_exit_end */
 
     /* Update nesting interrupts counter */
-    la      a0, port_uxInterruptNesting     /* a0 = &port_uxInterruptNesting */
+    la      a2, port_uxInterruptNesting     /* a2 = &port_uxInterruptNesting */
 #if ( configNUM_CORES > 1 )
-    add     a0, a0, a1                      /* a0 = &port_uxInterruptNesting[coreID] // a1 already contains coreID * 4 */
+    add     a2, a2, a1                      /* a2 = &port_uxInterruptNesting[coreID] // a1 already contains coreID * 4 */
 #endif /* ( configNUM_CORES > 1 ) */
-    lw      a2, 0(a0)                       /* a2 = port_uxInterruptNesting[coreID] */
+    lw      a0, 0(a2)                       /* a0 = port_uxInterruptNesting[coreID] */
 
     /* Already zero, protect against underflow */
-    beqz    a2, isr_skip_decrement          /* if (port_uxInterruptNesting[coreID] == 0) jump to isr_skip_decrement */
-    addi    a2, a2, -1                      /* a2 = a2 - 1 */
-    sw      a2, 0(a0)                       /* port_uxInterruptNesting[coreID] = a2 */
+    beqz    a0, isr_skip_decrement          /* if (port_uxInterruptNesting[coreID] == 0) jump to isr_skip_decrement */
+    addi    a0, a0, -1                      /* a0 = a0 - 1 */
+    sw      a0, 0(a2)                       /* port_uxInterruptNesting[coreID] = a0 */
     /* May still have interrupts pending, skip section below and exit */
-    bnez    a2, rtos_int_exit_end
+    bnez    a0, rtos_int_exit_end
 
 isr_skip_decrement:
     /* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */
@@ -147,11 +365,27 @@ isr_skip_decrement:
     lw      a2, 0(a0)                       /* a2 = xPortSwitchFlag[coreID] */
     beqz    a2, no_switch                   /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch */
 
-    /* Preserve return address and schedule next task. To speed up the process, instead of allocating stack
-     * space, let's use a callee-saved register: s0. Since the caller is not using it, let's use it. */
-    mv      s0, ra
+    /* Preserve return address and schedule next task. To speed up the process, and because this current routine
+     * is only meant to be called from the interrupt handle, let's save some speed and space by using callee-saved
+     * registers instead of stack space. Registers `s3-s11` are not used by the caller */
+    mv      s10, ra
+#if ( SOC_CPU_COPROC_NUM > 0 )
+    /* In the cases where the newly scheduled task is different from the previously running one,
+     * we have to disable the coprocessor(s) to let them trigger an exception on first use.
+     * Else, if the same task is scheduled, do not change the coprocessor(s) state. */
+    call    rtos_current_tcb
+    mv      s9, a0
+    call    vTaskSwitchContext
+    call    rtos_current_tcb
+    beq     a0, s9, rtos_int_exit_no_change
+    /* Disable the coprocessors in s11 register (former mstatus) */
+    li      a0, ~CSR_MSTATUS_FPU_DISABLE
+    and     s11, s11, a0
+rtos_int_exit_no_change:
+#else /* ( SOC_CPU_COPROC_NUM == 0 ) */
     call    vTaskSwitchContext
-    mv      ra, s0
+#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
+    mv      ra, s10
 
     /* Clears the switch pending flag */
     la      a0, xPortSwitchFlag             /* a0 = &xPortSwitchFlag */
@@ -198,4 +432,5 @@ no_switch:
 #endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */
 
 rtos_int_exit_end:
+    mv      a0, s11                         /* a0 = new mstatus */
     ret

+ 13 - 8
components/hal/spi_flash_hal.c

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -22,26 +22,31 @@ static const char *TAG = "flash_hal";
 
 static uint32_t get_flash_clock_divider(const spi_flash_hal_config_t *cfg)
 {
-    int clk_source = cfg->clock_src_freq;
+    const int clk_source   = cfg->clock_src_freq;
+    const int clk_freq_mhz = cfg->freq_mhz;
     // On ESP32, ESP32-S2, ESP32-C3, we allow specific frequency 26.666MHz
     // If user passes freq_mhz like 26 or 27, it's allowed to use integer divider 3.
     // However on other chips or on other frequency, we only allow user pass frequency which
     // can be integer divided. If no, the following strategy is round up the division and
     // round down flash frequency to keep it safe.
     int best_div = 0;
-    if (clk_source < cfg->freq_mhz) {
-        HAL_LOGE(TAG, "Target frequency %dMHz higher than supported.", cfg->freq_mhz);
+    if (clk_source < clk_freq_mhz) {
+        HAL_LOGE(TAG, "Target frequency %dMHz higher than supported.", clk_freq_mhz);
         abort();
     }
 #if CONFIG_IDF_TARGET_ESP32 || CONFIG_IDF_TARGET_ESP32S2 || CONFIG_IDF_TARGET_ESP32C3
-    if (cfg->freq_mhz == 26 || cfg->freq_mhz == 27) {
+    if (clk_freq_mhz == 26 || clk_freq_mhz == 27) {
         best_div = 3;
     } else
 #endif
     {
-        best_div = (int)ceil((double)clk_source / (double)cfg->freq_mhz);
-        if ((cfg->clock_src_freq % cfg->freq_mhz) != 0) {
-            HAL_LOGW(TAG, "Flash clock frequency round down to %d", (int)floor((double)clk_source / (double)best_div));
+        /* Do not use float/double as the FPU may not have been initialized yet on startup.
+         * The values are in MHz, so for sure we won't have an overflow by adding them. */
+        best_div = (clk_source + clk_freq_mhz - 1) / clk_freq_mhz;
+        /* Perform a division that returns both quotient and remainder */
+        const div_t res = div(clk_source, clk_freq_mhz);
+        if (res.rem != 0) {
+            HAL_LOGW(TAG, "Flash clock frequency round down to %d", res.quot);
         }
     }
 

+ 46 - 0
components/riscv/include/riscv/rv_utils.h

@@ -24,6 +24,21 @@ extern "C" {
 #define CSR_PCMR_MACHINE    0x7e1
 #define CSR_PCCR_MACHINE    0x7e2
 
+#if SOC_CPU_HAS_FPU
+
+/* FPU bits in mstatus start at bit 13 */
+#define CSR_MSTATUS_FPU_SHIFT       13
+/* FPU registers are clean if bits are 0b10 */
+#define CSR_MSTATUS_FPU_CLEAN_STATE 2
+/* FPU status in mstatus are represented with two bits */
+#define CSR_MSTATUS_FPU_MASK        3
+/* FPU is enabled when writing 1 to FPU bits */
+#define CSR_MSTATUS_FPU_ENA         BIT(13)
+/* Set FPU registers state to clean (after being dirty) */
+#define CSR_MSTATUS_FPU_CLEAR       BIT(13)
+
+#endif /* SOC_CPU_HAS_FPU */
+
 /* SW defined level which the interrupt module will mask interrupt with priority less than threshold during critical sections
    and spinlocks */
 #define RVHAL_EXCM_LEVEL    4
@@ -222,6 +237,37 @@ FORCE_INLINE_ATTR void rv_utils_intr_global_disable(void)
     RV_CLEAR_CSR(mstatus, MSTATUS_MIE);
 }
 
+
+#if SOC_CPU_HAS_FPU
+
+/* ------------------------------------------------- FPU Related ----------------------------------------------------
+ *
+ * ------------------------------------------------------------------------------------------------------------------ */
+
+FORCE_INLINE_ATTR bool rv_utils_enable_fpu(void)
+{
+    /* Set mstatus[14:13] to 0b01 to start the floating-point unit initialization */
+    RV_SET_CSR(mstatus, CSR_MSTATUS_FPU_ENA);
+    /* On the ESP32-P4, the FPU can be used directly after setting `mstatus` bit 13.
+     * Since the interrupt handler expects the FPU states to be either 0b10 or 0b11,
+     * let's write the FPU CSR and clear the dirty bit afterwards. */
+    RV_WRITE_CSR(fcsr, 1);
+    RV_CLEAR_CSR(mstatus, CSR_MSTATUS_FPU_CLEAR);
+    const uint32_t mstatus = RV_READ_CSR(mstatus);
+    /* Make sure the FPU state is 0b10 (clean registers) */
+    return ((mstatus >> CSR_MSTATUS_FPU_SHIFT) & CSR_MSTATUS_FPU_MASK) == CSR_MSTATUS_FPU_CLEAN_STATE;
+}
+
+
+FORCE_INLINE_ATTR void rv_utils_disable_fpu(void)
+{
+    /* Clear mstatus[14:13] bits to disable the floating-point unit */
+    RV_CLEAR_CSR(mstatus, CSR_MSTATUS_FPU_MASK << CSR_MSTATUS_FPU_SHIFT);
+}
+
+#endif /* SOC_CPU_HAS_FPU */
+
+
 /* -------------------------------------------------- Memory Ports -----------------------------------------------------
  *
  * ------------------------------------------------------------------------------------------------------------------ */

+ 77 - 1
components/riscv/include/riscv/rvruntime-frames.h

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -7,6 +7,8 @@
 #ifndef __RVRUNTIME_FRAMES_H__
 #define __RVRUNTIME_FRAMES_H__
 
+#include "soc/soc_caps.h"
+
 /* Align a value up to nearest n-byte boundary, where n is a power of 2. */
 #define ALIGNUP(n, val) (((val) + (n) - 1) & -(n))
 
@@ -82,6 +84,80 @@ STRUCT_FIELD (long, 4, RV_STK_MTVAL,   mtval)      /* Machine Trap Value */
 STRUCT_FIELD (long, 4, RV_STK_MHARTID, mhartid)    /* Hardware Thread ID in machine mode */
 STRUCT_END(RvExcFrame)
 
+#if SOC_CPU_COPROC_NUM > 0
+
+#if SOC_CPU_HAS_FPU
+/**
+ * @brief Floating-Point Unit save area
+ */
+STRUCT_BEGIN
+STRUCT_FIELD (long, 4, RV_FPU_FT0,  ft0)    /* ft0-ft7: Floating Point temporaries */
+STRUCT_FIELD (long, 4, RV_FPU_FT1,  ft1)
+STRUCT_FIELD (long, 4, RV_FPU_FT2,  ft2)
+STRUCT_FIELD (long, 4, RV_FPU_FT3,  ft3)
+STRUCT_FIELD (long, 4, RV_FPU_FT4,  ft4)
+STRUCT_FIELD (long, 4, RV_FPU_FT5,  ft5)
+STRUCT_FIELD (long, 4, RV_FPU_FT6,  ft6)
+STRUCT_FIELD (long, 4, RV_FPU_FT7,  ft7)
+STRUCT_FIELD (long, 4, RV_FPU_FS0,  fs0)    /* fs0-fs1: Floating Point saved registers */
+STRUCT_FIELD (long, 4, RV_FPU_FS1,  fs1)
+STRUCT_FIELD (long, 4, RV_FPU_FA0,  fa0)    /* fa0-fa1: Floating Point arguments/return values */
+STRUCT_FIELD (long, 4, RV_FPU_FA1,  fa1)
+STRUCT_FIELD (long, 4, RV_FPU_FA2,  fa2)    /* fa2-fa7: Floating Point arguments */
+STRUCT_FIELD (long, 4, RV_FPU_FA3,  fa3)
+STRUCT_FIELD (long, 4, RV_FPU_FA4,  fa4)
+STRUCT_FIELD (long, 4, RV_FPU_FA5,  fa5)
+STRUCT_FIELD (long, 4, RV_FPU_FA6,  fa6)
+STRUCT_FIELD (long, 4, RV_FPU_FA7,  fa7)
+STRUCT_FIELD (long, 4, RV_FPU_FS2,  fs2)    /* fs2-fs11: Floating Point saved registers */
+STRUCT_FIELD (long, 4, RV_FPU_FS3,  fs3)
+STRUCT_FIELD (long, 4, RV_FPU_FS4,  fs4)
+STRUCT_FIELD (long, 4, RV_FPU_FS5,  fs5)
+STRUCT_FIELD (long, 4, RV_FPU_FS6,  fs6)
+STRUCT_FIELD (long, 4, RV_FPU_FS7,  fs7)
+STRUCT_FIELD (long, 4, RV_FPU_FS8,  fs8)
+STRUCT_FIELD (long, 4, RV_FPU_FS9,  fs9)
+STRUCT_FIELD (long, 4, RV_FPU_FS10, fs10)
+STRUCT_FIELD (long, 4, RV_FPU_FS11, fs11)
+STRUCT_FIELD (long, 4, RV_FPU_FT8,  ft8)    /* ft8-ft11: Floating Point temporary registers */
+STRUCT_FIELD (long, 4, RV_FPU_FT9,  ft9)
+STRUCT_FIELD (long, 4, RV_FPU_FT10, ft10)
+STRUCT_FIELD (long, 4, RV_FPU_FT11, ft11)
+STRUCT_FIELD (long, 4, RV_FPU_FCSR, fcsr)   /* fcsr special register */
+STRUCT_END(RvFPUSaveArea)
+
+/* Floating-Point Unit coprocessor is now considered coprocessor 0 */
+#define FPU_COPROC_IDX  0
+/* PIE/AIA coprocessor is coprocessor 1 */
+#define PIE_COPROC_IDX  1
+
+/* Define the size of each coprocessor save area */
+#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__)
+#define RV_COPROC0_SIZE RvFPUSaveAreaSize
+#define RV_COPROC1_SIZE 0   // PIE/AIA coprocessor area
+#else
+#define RV_COPROC0_SIZE sizeof(RvFPUSaveArea)
+#define RV_COPROC1_SIZE 0   // PIE/AIA coprocessor area
+#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */
+
+#endif /* SOC_CPU_HAS_FPU */
+
+/**
+ * @brief Coprocessors save area, containing each coprocessor save area
+ */
+STRUCT_BEGIN
+/* Enable bitmap: BIT(i) represents coprocessor i, 1 is used, 0 else */
+STRUCT_FIELD  (long, 4, RV_COPROC_ENABLE, sa_enable)
+/* Address of the pool of memory used to allocate coprocessors save areas */
+STRUCT_FIELD  (long, 4, RV_COPROC_ALLOCATOR, sa_allocator)
+/* Pointer to the coprocessors save areas */
+STRUCT_AFIELD (void*, 4, RV_COPROC_SA, sa_coprocs, SOC_CPU_COPROC_NUM)
+STRUCT_END(RvCoprocSaveArea)
+
+
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
+
 #if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__)
 #define RV_STK_SZ1     RvExcFrameSize
 #else

+ 76 - 30
components/riscv/vectors.S

@@ -14,9 +14,15 @@
 
     .equ SAVE_REGS, 32
     .equ CONTEXT_SIZE, (SAVE_REGS * 4)
+    .equ EXC_ILLEGAL_INSTRUCTION, 0x2
     .equ panic_from_exception, xt_unhandled_exception
     .equ panic_from_isr, panicHandler
 
+#if ( SOC_CPU_COPROC_NUM > 0 )
+    /* Targets with coprocessors present a special CSR to get Illegal Instruction exception reason */
+    .equ EXT_ILL_CSR, 0x7F0
+#endif /* SOC_CPU_COPROC_NUM > 0 */
+
 /* Macro which first allocates space on the stack to save general
  * purpose registers, and then save them. GP register is excluded.
  * The default size allocated on the stack is CONTEXT_SIZE, but it
@@ -100,8 +106,10 @@
     csrw    mepc, t0
 .endm
 
+
     .global rtos_int_enter
     .global rtos_int_exit
+    .global rtos_save_fpu_coproc
     .global _global_interrupt_handler
 #ifdef CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME
     .global gdbstub_handle_debug_int
@@ -130,10 +138,56 @@ _panic_handler:
     sw    t0, RV_STK_MSTATUS(sp)
     csrr  t0, mtvec
     sw    t0, RV_STK_MTVEC(sp)
-    csrr  t0, mtval
-    sw    t0, RV_STK_MTVAL(sp)
     csrr  t0, mhartid
     sw    t0, RV_STK_MHARTID(sp)
+    csrr  t0, mtval
+    sw    t0, RV_STK_MTVAL(sp)
+
+    /* Keep mcause in s0, only the exception code and interrupt bit are relevant */
+    csrr  s0, mcause
+    li    t1, VECTORS_MCAUSE_INTBIT_MASK | VECTORS_MCAUSE_REASON_MASK
+    and   s0, s0, t1
+
+#if ( SOC_CPU_COPROC_NUM > 0 )
+    /* Check if the exception was cause by a coprocessor instruction. If this is the case, we have
+     * to lazily save the registers inside the current owner's save area */
+    /* Check if the exception is Illegal instruction */
+    li    a1, EXC_ILLEGAL_INSTRUCTION
+    bne   s0, a1, _panic_handler_not_coproc
+    /* In case this is due to a coprocessor, set ra right now to simplify the logic below */
+    la    ra, _return_from_exception
+    /* EXT_ILL CSR should contain the reason for the Illegal Instruction. */
+    csrr  a0, EXT_ILL_CSR
+    bnez  a0, _panic_handler_coproc
+#if SOC_CPU_HAS_FPU_EXT_ILL_BUG && SOC_CPU_HAS_FPU
+    /* If the SOC present the hardware EXT_ILL CSR bug, it doesn't support FPU load/store detection
+     * so we have to check the instruction's opcode (in `mtval` = `t0`) */
+    andi  a0, t0, 0b1011111
+    li    a1, 0b0000111
+    /* If opcode is of the form 0b0x00111, the instruction is FLW or FSW */
+    beq   a0, a1, rtos_save_fpu_coproc
+    /* Check the compressed instructions: C.FLW, C.FSW, C.FLWSP and C.FSWP.
+     * All of them have their highest 3 bits to x11 and the lowest bit to 0 */
+    li    a0, 0x6001
+    and   a0, t0, a0    /* a0 = mtval & 0x6001 */
+    li    a1, 0x6000
+    beq   a0, a1, rtos_save_fpu_coproc
+    /* The instruction was not an FPU one, continue the exception */
+#endif /* SOC_CPU_HAS_FPU_EXT_ILL_BUG && SOC_CPU_HAS_FPU */
+    j _panic_handler_not_coproc
+_panic_handler_coproc:
+    /* EXT_ILL CSR reasons are stored as follows:
+     * - Bit 0: FPU core instruction (Load/Store instructions NOT concerned)
+     * - Bit 1: Low-power core
+     * - Bit 2: PIE core
+     */
+#if SOC_CPU_HAS_FPU
+    li    a1, 1
+    beq   a0, a1, rtos_save_fpu_coproc
+#endif /* SOC_CPU_HAS_FPU */
+    /* Ignore LP and PIE for now, continue the exception */
+_panic_handler_not_coproc:
+#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
 
     /* Call panic_from_exception(sp) or panic_from_isr(sp)
      * depending on whether we have a pseudo excause or not.
@@ -141,11 +195,7 @@ _panic_handler:
      * so we have a pseudo excause. Else, it is due to a exception, we don't
      * have an pseudo excause */
     mv    a0, sp
-    csrr  a1, mcause
-
-    /* Only keep the interrupt bit and the source cause of the trap */
-    li t1, VECTORS_MCAUSE_INTBIT_MASK | VECTORS_MCAUSE_REASON_MASK
-    and a1, a1, t1
+    mv    a1, s0
 
     /* Branches instructions don't accept immediate values, so use t1 to
      * store our comparator */
@@ -156,10 +206,9 @@ _panic_handler:
     li    t0, 3
     beq   a1, t0, _call_gdbstub_handler
 #endif
-    /* exception_from_panic never returns */
-    jal panic_from_exception
+    call  panic_from_exception
     /* We arrive here if the exception handler has returned. */
-    j _return_from_exception
+    j     _return_from_exception
 
 #ifdef CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME
 _call_gdbstub_handler:
@@ -168,17 +217,15 @@ _call_gdbstub_handler:
 #endif
 
 _call_panic_handler:
-    /* Remove highest bit from mcause (a1) register and save it in the
-     * structure */
+    /* Remove highest bit from mcause (a1) register and save it in the structure */
     not   t0, t0
     and   a1, a1, t0
 #if CONFIG_SOC_INT_CLIC_SUPPORTED
     /* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */
-    add a1, a1, -16
+    add   a1, a1, -16
 #endif // CONFIG_SOC_INT_CLIC_SUPPORTED
     sw    a1, RV_STK_MCAUSE(sp)
-    jal panic_from_isr
-
+    call  panic_from_isr
     /* We arrive here if the exception handler has returned. This means that
      * the exception was handled, and the execution flow should resume.
      * Restore the registers and return from the exception.
@@ -195,10 +242,9 @@ _return_from_exception:
 
 
     /* This is the interrupt handler.
-     * It saves the registers on the stack,
-     * prepares for interrupt nesting,
-     * re-enables the interrupts,
-     * then jumps to the C dispatcher in interrupt.c.
+     * It saves the registers on the stack, prepares for interrupt nesting, re-enables the interrupts,
+     * then jumps to the C dispatcher in interrupt.c. Upon return, the register context will be restored
+     * from the stack.
      */
     .global _interrupt_handler
     .type _interrupt_handler, @function
@@ -213,11 +259,12 @@ _interrupt_handler:
      * the backtrace of threads preempted by interrupts (OS tick etc.).
      * GP is saved just to have its proper value in GDB. */
     /* As gp register is not saved by the macro, save it here */
-    sw    gp, RV_STK_GP(sp)
+    sw      gp, RV_STK_GP(sp)
     /* Same goes for the SP value before trapping */
-    addi  t0, sp, CONTEXT_SIZE /* restore sp with the value when interrupt happened */
-    /* Save SP */
-    sw    t0, RV_STK_SP(sp)
+    addi    a0, sp, CONTEXT_SIZE /* restore sp with the value when interrupt happened */
+
+    /* Save SP former value */
+    sw      a0, RV_STK_SP(sp)
 
     /* Notify the RTOS that an interrupt ocurred, it will save the current stack pointer
      * in the running TCB, no need to pass it as a parameter */
@@ -245,8 +292,7 @@ _interrupt_handler:
     fence
 #endif // !SOC_INT_HW_NESTED_SUPPORTED
 
-    li      t0, 0x8
-    csrrs   t0, mstatus, t0
+    csrsi   mstatus, 0x8
     /* MIE set. Nested interrupts can now occur */
 
     #ifdef CONFIG_PM_TRACE
@@ -275,8 +321,7 @@ _interrupt_handler:
 
     /* After dispatch c handler, disable interrupt to make freertos make context switch */
 
-    li      t0, 0x8
-    csrrc   t0, mstatus, t0
+    csrci   mstatus, 0x8
     /* MIE cleared. Nested interrupts are disabled */
 
 #if !SOC_INT_HW_NESTED_SUPPORTED
@@ -286,7 +331,9 @@ _interrupt_handler:
     fence
 #endif // !SOC_INT_HW_NESTED_SUPPORTED
 
-    /* The RTOS will restore the current TCB stack pointer. This routine will preserve s1 and s2 but alter s0. */
+    /* The RTOS will restore the current TCB stack pointer. This routine will preserve s1 and s2.
+     * Returns the new `mstatus` value. */
+    mv      a0, s2      /* a0 = mstatus */
     call    rtos_int_exit
 
     /* Restore the rest of the registers.
@@ -294,10 +341,9 @@ _interrupt_handler:
      * the former CPU priority. When executing `mret`, the hardware will restore the former threshold,
      * from `mcause` to `mintstatus` CSR */
     csrw    mcause, s1
-    csrw    mstatus, s2
+    csrw    mstatus, a0
     restore_mepc
     restore_general_regs
-
     /* exit, this will also re-enable the interrupts */
     mret
     .size  _interrupt_handler, .-_interrupt_handler

+ 12 - 0
components/soc/esp32p4/include/soc/Kconfig.soc_caps.in

@@ -287,6 +287,18 @@ config SOC_BRANCH_PREDICTOR_SUPPORTED
     bool
     default y
 
+config SOC_CPU_HAS_FPU
+    bool
+    default y
+
+config SOC_CPU_HAS_FPU_EXT_ILL_BUG
+    bool
+    default y
+
+config SOC_CPU_COPROC_NUM
+    int
+    default 2
+
 config SOC_CPU_BREAKPOINTS_NUM
     int
     default 4

+ 3 - 0
components/soc/esp32p4/include/soc/soc_caps.h

@@ -149,6 +149,9 @@
 #define SOC_INT_CLIC_SUPPORTED          1
 #define SOC_INT_HW_NESTED_SUPPORTED     1       // Support for hardware interrupts nesting
 #define SOC_BRANCH_PREDICTOR_SUPPORTED  1
+#define SOC_CPU_HAS_FPU                 1
+#define SOC_CPU_HAS_FPU_EXT_ILL_BUG     1       // EXT_ILL CSR doesn't support FLW/FSW
+#define SOC_CPU_COPROC_NUM              2
 
 #define SOC_CPU_BREAKPOINTS_NUM         4
 #define SOC_CPU_WATCHPOINTS_NUM         4