Jelajahi Sumber

Merge branch 'feature/freertos_xtensa_disable_port_using_mpu_wrappers' into 'master'

FreeRTOS: Fix FPU bugs on Amazon SMP FreeRTOS, port over FPU updates to IDF FreeRTOS

Closes IDF-5803

See merge request espressif/esp-idf!20280
Darian 3 tahun lalu
induk
melakukan
b223baf318
22 mengubah file dengan 650 tambahan dan 403 penghapusan
  1. 5 0
      components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c
  2. 9 0
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h
  3. 37 22
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c
  4. 74 10
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S
  5. 79 17
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h
  6. 30 42
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S
  7. 35 85
      components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S
  8. 5 0
      components/freertos/FreeRTOS-Kernel/portable/riscv/port.c
  9. 2 37
      components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h
  10. 9 0
      components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h
  11. 41 28
      components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c
  12. 102 22
      components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S
  13. 63 1
      components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h
  14. 19 12
      components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S
  15. 42 37
      components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S
  16. 52 56
      components/freertos/FreeRTOS-Kernel/tasks.c
  17. 1 1
      components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h
  18. 1 4
      components/freertos/linker.lf
  19. 41 25
      components/freertos/test_apps/freertos/port/test_fpu_in_task.c
  20. 2 0
      docs/doxygen-known-warnings.txt
  21. 0 1
      docs/doxygen/Doxyfile
  22. 1 3
      tools/test_idf_monitor/Makefile

+ 5 - 0
components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c

@@ -537,6 +537,11 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer,
 
 StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters)
 {
+#ifdef __clang_analyzer__
+    // Teach clang-tidy that pxTopOfStack cannot be a pointer to const
+    volatile StackType_t * pxTemp = pxTopOfStack;
+    pxTopOfStack = pxTemp;
+#endif /*__clang_analyzer__ */
     /*
     HIGH ADDRESS
     |---------------------------| <- pxTopOfStack on entry

+ 9 - 0
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h

@@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved).
 // void* XT_RTOS_CP_STATE(void)
 #define XT_RTOS_CP_STATE    _frxt_task_coproc_state
 
+/*
+RTOS provided hook function that is called on every coprocessor exception. May
+only be called from assembly code and by the 'call0' instruction.
+The implementation may use only a2-4, a15 (all other regs must be preserved).
+*/
+// void XT_RTOS_CP_EXC_HOOK(void)
+#if XCHAL_CP_NUM > 0
+#define XT_RTOS_CP_EXC_HOOK   _frxt_coproc_exc_hook
+#endif
 
 /*******************************************************************************
 

+ 37 - 22
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c

@@ -41,9 +41,25 @@
 
 _Static_assert(portBYTE_ALIGNMENT == 16, "portBYTE_ALIGNMENT must be set to 16");
 
-/*
-OS state variables
-*/
+/* ---------------------------------------------------- Variables ------------------------------------------------------
+ * - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files
+ * - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use
+ *   C constants instead of preprocessor macros due to assembly lacking "offsetof()".
+ * ------------------------------------------------------------------------------------------------------------------ */
+
+#if XCHAL_CP_NUM > 0
+/* Offsets used to navigate to a task's CPSA on the stack */
+const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
+const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE;  /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */
+#if configNUM_CORES > 1
+/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
+const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
+#if configUSE_CORE_AFFINITY != 1
+#error "configUSE_CORE_AFFINITY must be 1 on multicore targets with coprocessor support"
+#endif
+#endif /* configNUM_CORES > 1 */
+#endif /* XCHAL_CP_NUM > 0 */
+
 volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis
 unsigned int port_interruptNesting[portNUM_PROCESSORS] = {0};  // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit
 //FreeRTOS SMP Locks
@@ -423,12 +439,6 @@ static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters)
 }
 #endif
 
-const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
-#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
-const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
-#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
-const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE;
-
 /**
  * @brief Align stack pointer in a downward growing stack
  *
@@ -674,10 +684,15 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
                                      void * pvParameters )
 #endif
 {
+#ifdef __clang_analyzer__
+    // Teach clang-tidy that pxTopOfStack cannot be a pointer to const
+    volatile StackType_t * pxTemp = pxTopOfStack;
+    pxTopOfStack = pxTemp;
+#endif /*__clang_analyzer__ */
     /*
     HIGH ADDRESS
     |---------------------------| <- pxTopOfStack on entry
-    | Coproc Save Area          |
+    | Coproc Save Area          | (CPSA MUST BE FIRST)
     | ------------------------- |
     | TLS Variables             |
     | ------------------------- | <- Start of useable stack
@@ -697,7 +712,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
     configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
 
 #if XCHAL_CP_NUM > 0
-    // Initialize the coprocessor save area
+    // Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state()
     uxStackPointer = uxInitialiseStackCPSA(uxStackPointer);
     configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
 #endif /* XCHAL_CP_NUM > 0 */
@@ -717,25 +732,25 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
 // -------------------- Co-Processor -----------------------
 #if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
 
-void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xCoreID);
+void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID);
 
 void vPortCleanUpCoprocArea( void *pxTCB )
 {
-    StackType_t *coproc_area;
-    BaseType_t xCoreID;
+    UBaseType_t uxCoprocArea;
+    BaseType_t xTargetCoreID;
 
-    /* Calculate the coproc save area in the stack from the TCB base */
-    coproc_area = ( StackType_t * ) ( ( uint32_t ) ( pxTCB + offset_pxEndOfStack ));
-    coproc_area = ( StackType_t * ) ( ( ( portPOINTER_SIZE_TYPE ) coproc_area ) & ( ~( ( portPOINTER_SIZE_TYPE ) portBYTE_ALIGNMENT_MASK ) ) );
-    coproc_area = ( StackType_t * ) ( ( ( uint32_t ) coproc_area - XT_CP_SIZE ) & ~0xf );
+    /* Get pointer to the task's coprocessor save area from TCB->pxEndOfStack. See uxInitialiseStackCPSA() */
+    uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pxTCB )->pxDummy8 );  /* Get TCB_t.pxEndOfStack */
+    uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE);
 
     /* Extract core ID from the affinity mask */
-    xCoreID = __builtin_ffs( * ( UBaseType_t * ) ( pxTCB + offset_uxCoreAffinityMask ) );
-    assert( xCoreID >= 1 );
-    xCoreID -= 1;
+    xTargetCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ;
+    xTargetCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xTargetCoreID );
+    assert( xTargetCoreID >= 1 ); // __builtin_ffs always returns first set index + 1
+    xTargetCoreID -= 1;
 
     /* If task has live floating point registers somewhere, release them */
-    _xt_coproc_release( coproc_area, xCoreID );
+    _xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID );
 }
 #endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
 

+ 74 - 10
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S

@@ -34,32 +34,45 @@
 #define TOPOFSTACK_OFFS                 0x00    /* StackType_t *pxTopOfStack */
 
 .extern pxCurrentTCBs
+#if XCHAL_CP_NUM > 0
+/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */
 .extern offset_pxEndOfStack
 .extern offset_cpsa
+#if configNUM_CORES > 1
+/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
+.extern offset_uxCoreAffinityMask
+#endif /* configNUM_CORES > 1 */
+#endif /* XCHAL_CP_NUM > 0 */
 
 /*
-Macro to get a task's coprocessor save area (CPSA) from its TCB
+--------------------------------------------------------------------------------
+  Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB
+
+  Entry:
+  - "reg_A" contains a pointer to the task's TCB
+  Exit:
+  - "reg_A" contains pointer the the task's CPSA
+  - "reg_B" clobbered
 
-Entry:
-- reg_A contains a pointer to the TCB
-Exit:
-- reg_A contains a pointer to the CPSA
-- reg_B destroyed
+  The two arguments must be different AR registers.
+--------------------------------------------------------------------------------
 */
+#if XCHAL_CP_NUM > 0
     .macro get_cpsa_from_tcb reg_A reg_B
-    // Get TCB.pxEndOfStack from reg_A
+    /* Get TCB.pxEndOfStack from reg_A */
     movi    \reg_B, offset_pxEndOfStack     /* Move &offset_pxEndOfStack into reg_B */
     l32i    \reg_B, \reg_B, 0               /* Load offset_pxEndOfStack into reg_B */
     add     \reg_A, \reg_A, \reg_B          /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */
     l32i    \reg_A, \reg_A, 0               /* Load TCB.pxEndOfStack into reg_A */
-    //Offset to start of coproc save area
+    /* Offset to start of CP save area */
     movi    \reg_B, offset_cpsa             /* Move &offset_cpsa into reg_B */
     l32i    \reg_B, \reg_B, 0               /* Load offset_cpsa into reg_B */
     sub     \reg_A, \reg_A, \reg_B          /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */
-    //Align down start of CP save area to 16 byte boundary
+    /* Align down start of CP save area to 16 byte boundary */
     movi    \reg_B, ~(0xF)
-    and     \reg_A, \reg_A, \reg_B          /* Align CPSA pointer to 16 bytes */
+    and     \reg_A, \reg_A, \reg_B          /* Align CP save area pointer to 16 bytes */
     .endm
+#endif /* XCHAL_CP_NUM > 0 */
 
     .global     port_IntStack
     .global     port_switch_flag    //Required by sysview_tracing build
@@ -692,3 +705,54 @@ _frxt_task_coproc_state:
 2:  ret
 
 #endif /* XCHAL_CP_NUM > 0 */
+
+/*
+**********************************************************************************************************
+*                                        _frxt_coproc_exc_hook
+*                                   void _frxt_coproc_exc_hook(void)
+*
+* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS.
+*
+* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions.
+* May only only use a2-4, a15 (all other regs must be preserved).
+* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h.
+*
+**********************************************************************************************************
+*/
+#if XCHAL_CP_NUM > 0
+
+    .globl  _frxt_coproc_exc_hook
+    .type   _frxt_coproc_exc_hook,@function
+    .align  4
+_frxt_coproc_exc_hook:
+
+    #if configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1
+    getcoreid a2                            /* a2 = xCurCoreID */
+    /* if (port_xSchedulerRunning[xCurCoreID] == 0) */
+    movi    a3, port_xSchedulerRunning
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0
+    beqz    a3, 1f                          /* Scheduler hasn't started yet. Return. */
+    /* if (port_interruptNesting[xCurCoreID] != 0) */
+    movi    a3, port_interruptNesting
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0
+    bnez    a3, 1f                          /* We are in an interrupt. Return*/
+    /* CP operations are incompatible with unpinned tasks. Thus we pin the task
+    to the current running core by updating its TCB.uxCoreAffinityMask field. */
+    movi    a3, pxCurrentTCBs
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0                       /* a3 = pxCurrentTCBs[xCurCoreID] */
+    movi    a4, offset_uxCoreAffinityMask
+    l32i    a4, a4, 0                       /* a4 = offset_uxCoreAffinityMask */
+    add     a3, a3, a4                      /* a3 = &TCB.uxCoreAffinityMask */
+    ssl     a2                              /* Use xCurCoreID as left shift amount */
+    movi    a4, 1
+    sll     a4, a4                          /* a4 = (1 << xCurCoreID) */
+    s32i    a4, a3, 0                       /* TCB.uxCoreAffinityMask = a4 = (1 << xCurCoreID) */
+1:
+    #endif /* configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 */
+
+    ret
+
+#endif /* XCHAL_CP_NUM > 0 */

+ 79 - 17
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h

@@ -50,26 +50,88 @@
 
 .macro SPILL_ALL_WINDOWS
 #if XCHAL_NUM_AREGS == 64
-	and a12, a12, a12
-	rotw 3
-	and a12, a12, a12
-	rotw 3
-	and a12, a12, a12
-	rotw 3
-	and a12, a12, a12
-	rotw 3
-	and a12, a12, a12
-	rotw 4
+    and a12, a12, a12
+    rotw 3
+    and a12, a12, a12
+    rotw 3
+    and a12, a12, a12
+    rotw 3
+    and a12, a12, a12
+    rotw 3
+    and a12, a12, a12
+    rotw 4
 #elif XCHAL_NUM_AREGS == 32
-	and a12, a12, a12
-	rotw 3
-	and a12, a12, a12
-	rotw 3
-	and a4, a4, a4
-	rotw 2
+    and a12, a12, a12
+    rotw 3
+    and a12, a12, a12
+    rotw 3
+    and a4, a4, a4
+    rotw 2
 #else
 #error Unrecognized XCHAL_NUM_AREGS
 #endif
 .endm
 
-#endif
+/*
+--------------------------------------------------------------------------------
+  Macro spinlock_take
+
+  This macro will repeatedley attempt to atomically set a spinlock variable
+  using the s32c1i instruciton. A spinlock is considered free if its value is 0.
+
+  Entry:
+  - "reg_A/B" as scratch registers
+  - "lock_var" spinlock variable's symbol
+  - Interrupts must already be disabled by caller
+  Exit:
+  - Spinlock set to current core's ID (PRID)
+  - "reg_A/B" clobbered
+--------------------------------------------------------------------------------
+*/
+
+#if portNUM_PROCESSORS > 1
+
+    .macro  spinlock_take       reg_A reg_B lock_var
+
+    movi    \reg_A, \lock_var               /* reg_A = &lock_var */
+.L_spinlock_loop:
+    movi    \reg_B, 0                       /* Load spinlock free value (0) into SCOMPARE1 */
+    wsr     \reg_B, SCOMPARE1
+    rsync                                   /* Ensure that SCOMPARE1 is set before s32c1i executes */
+    rsr     \reg_B, PRID                    /* Load the current core's ID into reg_B */
+    s32c1i  \reg_B, \reg_A, 0               /* Attempt *lock_var = reg_B */
+    bnez    \reg_B, .L_spinlock_loop        /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
+
+    .endm
+
+#endif /* portNUM_PROCESSORS > 1 */
+
+/*
+--------------------------------------------------------------------------------
+  Macro spinlock_release
+
+  This macro will release a spinlock variable previously taken by the
+  spinlock_take macro.
+
+  Entry:
+  - "reg_A/B" as scratch registers
+  - "lock_var" spinlock variable's symbol
+  - Interrupts must already be disabled by caller
+  Exit:
+  - "reg_A/B" clobbered
+--------------------------------------------------------------------------------
+*/
+
+#if portNUM_PROCESSORS > 1
+
+    .macro spinlock_release     reg_A reg_B lock_var
+
+    movi    \reg_A, \lock_var               /* reg_A = &lock_var */
+    movi    \reg_B, 0
+    s32i    \reg_B, \reg_A, 0               /* Release the spinlock (*reg_A = 0) */
+
+    .endm
+
+#endif /* portNUM_PROCESSORS > 1 */
+
+#endif /* __XT_ASM_UTILS_H */

+ 30 - 42
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S

@@ -397,18 +397,16 @@ May be called when a thread terminates or completes but does not delete
 the co-proc save area, to avoid the exception handler having to save the
 thread's co-proc state before another thread can use it (optimization).
 
-Needs to be called on the processor the thread was running on. Unpinned threads
-won't have an entry here because they get pinned as soon they use a coprocessor.
-
 Entry Conditions:
     A2  = Pointer to base of co-processor state save area.
-    A3  = Core ID of the pinned task
+    A3  = Core ID of the task (must be pinned) who's coproc ownership we are
+          releasing.
 
 Exit conditions:
     None.
 
 Obeys ABI conventions per prototype:
-    void _xt_coproc_release(void * coproc_sa_base, BaseType_t xCoreID)
+    void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID)
 
 *******************************************************************************/
 
@@ -421,43 +419,33 @@ Obeys ABI conventions per prototype:
     .align  4
 _xt_coproc_release:
     ENTRY0                                  /* a2 = base of save area */
-                                            /* a3 = core ID */
-
-    rsil    a7, XCHAL_EXCM_LEVEL            /* lock interrupts */
-
-    /* Aquire spinlock before proceeding with the routine.
-     * Refer _xt_coproc_exc for details on the puspose of
-     * the _xt_coproc_owner_sa_lock lock and its intended use.
-     */
-.L_spinlock_loop:
-    mov     a8, a3                          /* Save a copy of the core ID in a8 */
-    movi    a10, _xt_coproc_owner_sa_lock   /* a10 = base address of lock variable */
-    addx4   a10, a8, a10                    /* Use core ID in a8 to calculate the offset to the lock variable for the core */
-    movi    a11, 0                          /* a11 = 0 */
-    wsr     a11, scompare1                  /* scompare1 = a11 :- Expect the spinlock to be free (value = 0) */
-    movi    a11, 1                          /* a11 = 1 :- Write 1 to take the spinlock */
-    s32c1i  a11, a10, 0                     /* if (lock == scompare1) {tmp = lock; lock = a11; a11 = tmp} else {a11 = lock} */
-    bnez    a11, .L_spinlock_loop           /* if (a11 != 0) {loop} :- Keep spinning until the spinlock is available */
-
-    movi    a4, XCHAL_CP_MAX << 2
-    mull    a3, a3, a4
-    movi    a4, _xt_coproc_owner_sa         /* a4 = base of owner array */
-    add     a4, a4, a3
-
-    addi    a5, a4, XCHAL_CP_MAX << 2       /* a5 = top+1 of owner array */
-    movi    a6, 0                           /* a6 = 0 (unowned) */
-
-1:  l32i    a8, a4, 0                       /* a8 = owner at a4 */
-    bne     a2, a8, 2f                      /* if (coproc_sa_base == owner) */
-    s32i    a3, a4, 0                       /*   owner = unowned */
-2:  addi    a4, a4, 1<<2                    /* a4 = next entry in owner array */
-    bltu    a4, a5, 1b                      /* repeat until end of array */
-
-3:  wsr     a7, PS                          /* restore interrupts */
-
-    /* Release spinlock */
-    movi    a11, 0                          /* a11 = 0 */
-    s32ri   a11, a10, 0                     /* a10 = base address of lock variable. Write 0 to release the lock */
+                                            /* a3 = xTargetCoreID */
+
+    movi    a4, XCHAL_CP_MAX << 2           /* a4 = size of an owner array */
+    mull    a4, a3, a4                      /* a4 = offset to the owner array of the target core */
+    movi    a3, _xt_coproc_owner_sa         /* a3 = base of all owner arrays */
+    add     a3, a3, a4                      /* a3 = base of owner array of the target core */
+    addi    a4, a3, XCHAL_CP_MAX << 2       /* a4 = top+1 of owner array of the target core */
+    movi    a5, 0                           /* a5 = 0 (unowned) */
+
+    rsil    a6, XCHAL_EXCM_LEVEL            /* lock interrupts */
+#if portNUM_PROCESSORS > 1
+    /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
+     * to ensure thread safe access of _xt_coproc_owner_sa between cores. */
+    spinlock_take a7 a8 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
+
+1:  l32i    a7, a3, 0                       /* a7 = owner at a3 */
+    bne     a2, a7, 2f                      /* if (coproc_sa_base == owner) */
+    s32i    a5, a3, 0                       /*   owner = unowned */
+2:  addi    a3, a3, 1<<2                    /* a3 = next entry in owner array */
+    bltu    a3, a4, 1b                      /* repeat until end of array */
+
+#if portNUM_PROCESSORS > 1
+    /* Release previously taken spinlock */
+    spinlock_release a7 a8 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
+    wsr     a6, PS                          /* restore interrupts */
 
     RET0
 

+ 35 - 85
components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S

@@ -102,15 +102,8 @@
 #include "esp_private/panic_reason.h"
 #include "sdkconfig.h"
 #include "soc/soc.h"
+#include "xt_asm_utils.h"
 
-/*
-  Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
-  Please change this when the tcb structure is changed
-*/
-.extern pxCurrentTCBs
-#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
-.extern offset_uxCoreAffinityMask
-#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
 
 /*
 --------------------------------------------------------------------------------
@@ -859,22 +852,22 @@ _xt_coproc_owner_sa:
 /* Spinlock per core for accessing _xt_coproc_owner_sa array
  *
  * 0 = Spinlock available
- * 1 = Spinlock taken
+ * PRID = Spinlock taken
  *
  * The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array.
- * This array can be modified by both _xt_coproc_exc and _xt_coproc_release routines
- * simultaneously owing to the fact that the FreeRTOS SMP Kernel allows cross-core
- * task deletion. Therefore, the same memory location in the owner save-area array
- * could be modified at the same time.
+ * The array can be modified by multiple cores simultaneously (via _xt_coproc_exc
+ * and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread
+ * safe access of the _xt_coproc_owner_sa array.
  */
+#if portNUM_PROCESSORS > 1
     .global _xt_coproc_owner_sa_lock
     .type   _xt_coproc_owner_sa_lock,@object
     .align  16                      /* minimize crossing cache boundaries */
 _xt_coproc_owner_sa_lock:
-    .space  (portNUM_PROCESSORS) << 2
+    .space  4
+#endif /* portNUM_PROCESSORS > 1 */
 
     .section .iram1,"ax"
-
     .align  4
 .L_goto_invalid:
     j   .L_xt_coproc_invalid    /* not in a thread (invalid) */
@@ -924,51 +917,15 @@ _xt_coproc_exc:
     s32i    a4,  sp, XT_STK_A4
     s32i    a15, sp, XT_STK_A15
 
-    /* Aquire spinlock before proceeding with the exception handler.
-     * (Refer _xt_coproc_release for competing routine for the lock.)
-     *
-     * [refactor-todo]: The spinlock aquire/release routine can be
-     * refactored in to a macro later if the need arises to use it
-     * at more than one place in the port assembler files.
-     */
-.L_spinlock_loop:
-    movi        a2, _xt_coproc_owner_sa_lock    /* a2 = base address of lock variable */
-    getcoreid   a0                              /* get the core ID in a0 to calculate the offset of the lock variable */
-    addx4       a2, a0, a2                      /* a2 = address of desired lock variable */
-    movi        a0, 0                           /* a0 = 0 */
-    wsr         a0, scompare1                   /* scompare1 = a0 :- Expect the spinlock to be free (value = 0) */
-    movi        a0, 1                           /* a0 = 1 :- Write 1 to take the spinlock */
-    s32c1i      a0, a2, 0                       /* if (lock == scompare1) {tmp = lock; lock = a0; a0 = tmp} else {a0 = lock} */
-    bnez        a0, .L_spinlock_loop            /* if (a0 != 0) {loop} :- Keep spinning until the spinlock is available */
+    /* Call the RTOS coprocessor exception hook */
+    call0   XT_RTOS_CP_EXC_HOOK
 
     /* Get co-processor state save area of new owner thread. */
     call0   XT_RTOS_CP_STATE                /* a15 = new owner's save area */
-    #if CONFIG_FREERTOS_FPU_IN_ISR
-    beqz    a15, .L_skip_core_pin           /* CP used in ISR, skip task pinning */
-    #else
+    #if !CONFIG_FREERTOS_FPU_IN_ISR
     beqz    a15, .L_goto_invalid            /* not in a thread (invalid) */
     #endif
 
-#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
-    /* CP operations are incompatible with unpinned tasks. Thus we pin the task
-    to the current running core. */
-    movi    a2, pxCurrentTCBs
-    getcoreid a3                            /* a3 = current core ID */
-    addx4   a2, a3, a2
-    l32i    a2, a2, 0                       /* a2 = start of pxCurrentTCBs[cpuid] */
-    movi    a4, offset_uxCoreAffinityMask
-    l32i    a4, a4, 0                       /* a4 = offset_uxCoreAffinityMask */
-    add     a2, a2, a4                      /* a2 = &TCB.uxCoreAffinityMask */
-    ssl     a3                              /* Use core ID as shift amount */
-    movi    a4, 1
-    sll     a4, a4                          /* a4 = uxCoreAffinityMask = (1 << core ID) */
-    s32i    a4, a2, 0                       /* Store affinity mask to TCB.uxCoreAffinityMask */
-#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
-
-#if CONFIG_FREERTOS_FPU_IN_ISR
-.L_skip_core_pin:
-#endif
-
     /* Enable the co-processor's bit in CPENABLE. */
     movi    a0, _xt_coproc_mask
     rsr     a4, CPENABLE                    /* a4 = CPENABLE */
@@ -978,17 +935,18 @@ _xt_coproc_exc:
     or      a4, a4, a2                      /* a4 = CPENABLE | (1 << n) */
     wsr     a4, CPENABLE
 
-/*
-Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value
-everywhere): _xt_coproc_release assumes it works like this in order not to need
-locking.
-*/
-    /* Grab correct xt_coproc_owner_sa for this core */
+    /* Grab the xt_coproc_owner_sa owner array for current core */
     getcoreid a3                            /* a3 = current core ID */
-    movi    a2, XCHAL_CP_MAX << 2
-    mull    a2, a2, a3                      /* multiply by current processor id */
-    movi    a3, _xt_coproc_owner_sa         /* a3 = base of owner array */
-    add     a3, a3, a2                      /* a3 = owner area needed for this processor */
+    movi    a2, XCHAL_CP_MAX << 2           /* a2 = size of an owner array */
+    mull    a2, a2, a3                      /* a2 = offset to the owner array of the current core*/
+    movi    a3, _xt_coproc_owner_sa         /* a3 = base of all owner arrays */
+    add     a3, a3, a2                      /* a3 = base of owner array of the current core */
+
+#if portNUM_PROCESSORS > 1
+    /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
+     * to ensure thread safe access of _xt_coproc_owner_sa between cores. */
+    spinlock_take a0 a2 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
 
     /* Get old coprocessor owner thread (save area ptr) and assign new one.  */
     addx4   a3,  a5, a3                      /* a3 = &_xt_coproc_owner_sa[n] */
@@ -996,13 +954,21 @@ locking.
     s32i    a15, a3, 0                       /* _xt_coproc_owner_sa[n] = new */
     rsync                                    /* ensure wsr.CPENABLE is complete */
 
+#if portNUM_PROCESSORS > 1
+    /* Release previously taken spinlock */
+    spinlock_release a0 a2 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
+
     /* Only need to context switch if new owner != old owner. */
     /* If float is necessary on ISR, we need to remove this check */
     /* below, because on restoring from ISR we may have new == old condition used
      * to force cp restore to next thread
+     * Todo: IDF-6418
      */
-    #ifndef CONFIG_FREERTOS_FPU_IN_ISR
-    beq     a15, a2, .L_goto_done           /* new owner == old, we're done */
+    #if !CONFIG_FREERTOS_FPU_IN_ISR
+    bne     a15, a2, .L_switch_context
+    j    .L_goto_done                       /* new owner == old, we're done */
+.L_switch_context:
     #endif
 
     /* If no old owner then nothing to save. */
@@ -1072,14 +1038,6 @@ locking.
     /* Restore interruptee's saved registers. */
     /* Can omit rsync for wsr.CPENABLE here because _xt_user_exit does it. */
 .L_xt_coproc_done:
-
-    /* Release spinlock */
-    movi        a2, _xt_coproc_owner_sa_lock    /* a2 = base address of the lock variable */
-    getcoreid   a0                              /* a0 = core ID to calculate the offset of the lock variable */
-    addx4       a2, a0, a2                      /* a2 = address of the lock variable */
-    movi        a0, 0                           /* a0 = 0 */
-    s32ri       a0, a2, 0                       /* a2 = a0 :- Write 0 to release the lock */
-
     l32i    a15, sp, XT_STK_A15
     l32i    a5,  sp, XT_STK_A5
     l32i    a4,  sp, XT_STK_A4
@@ -1107,14 +1065,6 @@ locking.
 
     /* Co-processor exception occurred outside a thread (not supported). */
 .L_xt_coproc_invalid:
-
-    /* Release spinlock */
-    movi        a2, _xt_coproc_owner_sa_lock    /* a2 = base address of the lock variable */
-    getcoreid   a0                              /* a0 = core ID to calculate the offset of the lock variable */
-    addx4       a2, a0, a2                      /* a2 = address of the lock variable */
-    movi        a0, 0                           /* a0 = 0 */
-    s32ri       a0, a2, 0                       /* a2 = a0 :- Write 0 to release the lock */
-
     movi    a0,PANIC_RSN_COPROCEXCEPTION
     wsr     a0,EXCCAUSE
     call0   _xt_panic                       /* not in a thread (invalid) */
@@ -1735,7 +1685,7 @@ _Level6Vector:
     .global     xt_nmi
     .align      4
 _NMIExceptionVector:
-    wsr     a0, EXCSAVE + XCHAL_NMILEVEL  _ /* preserve a0 */
+    wsr     a0, EXCSAVE + XCHAL_NMILEVEL   /* preserve a0 */
     call0   xt_nmi                         /* load interrupt handler */
     /* never returns here - call0 is used as a jump (see note at top) */
 
@@ -1856,9 +1806,9 @@ _xt_alloca_exc:
     wsr     a2, PS          /* update PS.OWB to new window base */
     rsync
 
-    _bbci.l a4, 31, _WindowUnderflow4
+    bbci.l a4, 31, _WindowUnderflow4
     rotw    -1              /* original a0 goes to a8 */
-    _bbci.l a8, 30, _WindowUnderflow8
+    bbci.l a8, 30, _WindowUnderflow8
     rotw    -1
     j               _WindowUnderflow12
 

+ 5 - 0
components/freertos/FreeRTOS-Kernel/portable/riscv/port.c

@@ -274,6 +274,11 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer,
 
 StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters)
 {
+#ifdef __clang_analyzer__
+    // Teach clang-tidy that pxTopOfStack cannot be a pointer to const
+    volatile StackType_t * pxTemp = pxTopOfStack;
+    pxTopOfStack = pxTemp;
+#endif /*__clang_analyzer__ */
     /*
     HIGH ADDRESS
     |---------------------------| <- pxTopOfStack on entry

+ 2 - 37
components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h

@@ -642,48 +642,13 @@ FORCE_INLINE_ATTR BaseType_t xPortGetCoreID(void)
 /* ------------------------------------------------------ Misc ---------------------------------------------------------
  * - Miscellaneous porting macros
  * - These are not port of the FreeRTOS porting interface, but are used by other FreeRTOS dependent components
- * - [refactor-todo] Remove dependency on MPU wrappers by modifying TCB
  * ------------------------------------------------------------------------------------------------------------------ */
 
 // -------------------- Co-Processor -----------------------
 
-// When coprocessors are defined, we maintain a pointer to coprocessors area.
-// We currently use a hack: redefine field xMPU_SETTINGS in TCB block as a structure that can hold:
-// MPU wrappers, coprocessor area pointer, trace code structure, and more if needed.
-// The field is normally used for memory protection. FreeRTOS should create another general purpose field.
-typedef struct {
 #if XCHAL_CP_NUM > 0
-    volatile StackType_t *coproc_area; // Pointer to coprocessor save area; MUST BE FIRST
-#endif
-
-#if portUSING_MPU_WRAPPERS
-    // Define here mpu_settings, which is port dependent
-    int mpu_setting; // Just a dummy example here; MPU not ported to Xtensa yet
-#endif
-} xMPU_SETTINGS;
-
-// Main hack to use MPU_wrappers even when no MPU is defined (warning: mpu_setting should not be accessed; otherwise move this above xMPU_SETTINGS)
-#if (XCHAL_CP_NUM > 0) && !portUSING_MPU_WRAPPERS   // If MPU wrappers not used, we still need to allocate coproc area
-#undef portUSING_MPU_WRAPPERS
-#define portUSING_MPU_WRAPPERS 1   // Enable it to allocate coproc area
-#define MPU_WRAPPERS_H             // Override mpu_wrapper.h to disable unwanted code
-#define PRIVILEGED_FUNCTION
-#define PRIVILEGED_DATA
-#endif
-
-void _xt_coproc_release(volatile void *coproc_sa_base);
-
-/*
- * The structures and methods of manipulating the MPU are contained within the
- * port layer.
- *
- * Fills the xMPUSettings structure with the memory region information
- * contained in xRegions.
- */
-#if( portUSING_MPU_WRAPPERS == 1 )
-struct xMEMORY_REGION;
-void vPortStoreTaskMPUSettings( xMPU_SETTINGS *xMPUSettings, const struct xMEMORY_REGION *const xRegions, StackType_t *pxBottomOfStack, uint32_t usStackDepth ) PRIVILEGED_FUNCTION;
-void vPortReleaseTaskMPUSettings( xMPU_SETTINGS *xMPUSettings );
+void vPortCleanUpCoprocArea(void *pvTCB);
+#define portCLEAN_UP_COPROC(pvTCB)      vPortCleanUpCoprocArea(pvTCB)
 #endif
 
 // -------------------- Heap Related -----------------------

+ 9 - 0
components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h

@@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved).
 // void* XT_RTOS_CP_STATE(void)
 #define XT_RTOS_CP_STATE    _frxt_task_coproc_state
 
+/*
+RTOS provided hook function that is called on every coprocessor exception. May
+only be called from assembly code and by the 'call0' instruction.
+The implementation may use only a2-4, a15 (all other regs must be preserved).
+*/
+// void XT_RTOS_CP_EXC_HOOK(void)
+#if XCHAL_CP_NUM > 0
+#define XT_RTOS_CP_EXC_HOOK   _frxt_coproc_exc_hook
+#endif
 
 /*******************************************************************************
 

+ 41 - 28
components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c

@@ -81,9 +81,21 @@ _Static_assert(tskNO_AFFINITY == CONFIG_FREERTOS_NO_AFFINITY, "incorrect tskNO_A
 
 
 /* ---------------------------------------------------- Variables ------------------------------------------------------
- *
+ * - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files
+ * - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use
+ *   C constants instead of preprocessor macros due to assembly lacking "offsetof()".
  * ------------------------------------------------------------------------------------------------------------------ */
 
+#if XCHAL_CP_NUM > 0
+/* Offsets used to navigate to a task's CPSA on the stack */
+const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
+const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE;  /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */
+#if configNUM_CORES > 1
+/* Offset to TCB_t.xCoreID member. Used to pin unpinned tasks that use the FPU. */
+const DRAM_ATTR uint32_t offset_xCoreID = offsetof(StaticTask_t, xDummyCoreID);
+#endif /* configNUM_CORES > 1 */
+#endif /* XCHAL_CP_NUM > 0 */
+
 volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis
 unsigned port_interruptNesting[portNUM_PROCESSORS] = {0};  // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit
 BaseType_t port_uxCriticalNesting[portNUM_PROCESSORS] = {0};
@@ -389,16 +401,26 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer,
     return uxStackPointer;
 }
 
-#if portUSING_MPU_WRAPPERS
-StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters, BaseType_t xRunPrivileged)
+#if ( portHAS_STACK_OVERFLOW_CHECKING == 1 )
+StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
+                                     StackType_t * pxEndOfStack,
+                                     TaskFunction_t pxCode,
+                                     void * pvParameters )
 #else
-StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters )
+StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
+                                     TaskFunction_t pxCode,
+                                     void * pvParameters )
 #endif
 {
+#ifdef __clang_analyzer__
+    // Teach clang-tidy that pxTopOfStack cannot be a pointer to const
+    volatile StackType_t * pxTemp = pxTopOfStack;
+    pxTopOfStack = pxTemp;
+#endif /*__clang_analyzer__ */
     /*
     HIGH ADDRESS
     |---------------------------| <- pxTopOfStack on entry
-    | Coproc Save Area          |
+    | Coproc Save Area          | (CPSA MUST BE FIRST)
     | ------------------------- |
     | TLS Variables             |
     | ------------------------- | <- Start of useable stack
@@ -417,7 +439,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC
     // Make sure the incoming stack pointer is aligned on 16
     configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
 #if XCHAL_CP_NUM > 0
-    // Initialize the coprocessor save area
+    // Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state()
     uxStackPointer = uxInitialiseStackCPSA(uxStackPointer);
     // Each allocated section on the stack must have a size aligned on 16
     configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
@@ -583,33 +605,24 @@ void vPortSetStackWatchpoint( void *pxStackStart )
     esp_cpu_set_watchpoint(STACK_WATCH_POINT_NUMBER, (char *)addr, 32, ESP_CPU_WATCHPOINT_STORE);
 }
 
-/* ---------------------------------------------- Misc Implementations -------------------------------------------------
- *
- * ------------------------------------------------------------------------------------------------------------------ */
-
 // -------------------- Co-Processor -----------------------
 
-/*
- * Used to set coprocessor area in stack. Current hack is to reuse MPU pointer for coprocessor area.
- */
-#if portUSING_MPU_WRAPPERS
-void vPortStoreTaskMPUSettings( xMPU_SETTINGS *xMPUSettings, const struct xMEMORY_REGION *const xRegions, StackType_t *pxBottomOfStack, uint32_t usStackDepth )
-{
 #if XCHAL_CP_NUM > 0
-    xMPUSettings->coproc_area = ( StackType_t * ) ( ( uint32_t ) ( pxBottomOfStack + usStackDepth - 1 ));
-    xMPUSettings->coproc_area = ( StackType_t * ) ( ( ( portPOINTER_SIZE_TYPE ) xMPUSettings->coproc_area ) & ( ~( ( portPOINTER_SIZE_TYPE ) portBYTE_ALIGNMENT_MASK ) ) );
-    xMPUSettings->coproc_area = ( StackType_t * ) ( ( ( uint32_t ) xMPUSettings->coproc_area - XT_CP_SIZE ) & ~0xf );
+void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID);
 
+void vPortCleanUpCoprocArea(void *pvTCB)
+{
+    UBaseType_t uxCoprocArea;
+    BaseType_t xTargetCoreID;
 
-    /* NOTE: we cannot initialize the coprocessor save area here because FreeRTOS is going to
-     * clear the stack area after we return. This is done in pxPortInitialiseStack().
-     */
-#endif
-}
+    /* Get a pointer to the task's coprocessor save area */
+    uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pvTCB )->pxDummy8 );  /* Get TCB_t.pxEndOfStack */
+    uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE);
+
+    /* Get xTargetCoreID from the TCB.xCoreID */
+    xTargetCoreID = ( ( StaticTask_t * ) pvTCB )->xDummyCoreID;
 
-void vPortReleaseTaskMPUSettings( xMPU_SETTINGS *xMPUSettings )
-{
     /* If task has live floating point registers somewhere, release them */
-    _xt_coproc_release( xMPUSettings->coproc_area );
+    _xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID );
 }
-#endif /* portUSING_MPU_WRAPPERS */
+#endif /* XCHAL_CP_NUM > 0 */

+ 102 - 22
components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S

@@ -32,9 +32,47 @@
 #include "sdkconfig.h"
 
 #define TOPOFSTACK_OFFS                 0x00    /* StackType_t *pxTopOfStack */
-#define CP_TOPOFSTACK_OFFS              0x04    /* xMPU_SETTINGS.coproc_area */
 
 .extern pxCurrentTCB
+#if XCHAL_CP_NUM > 0
+/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */
+.extern offset_pxEndOfStack
+.extern offset_cpsa
+#if configNUM_CORES > 1
+/* Offset to TCB_t.xCoreID member. Used to pin unpinned tasks that use the FPU. */
+.extern offset_xCoreID
+#endif /* configNUM_CORES > 1 */
+#endif /* XCHAL_CP_NUM > 0 */
+
+/*
+--------------------------------------------------------------------------------
+  Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB
+
+  Entry:
+  - "reg_A" contains a pointer to the task's TCB
+  Exit:
+  - "reg_A" contains pointer the the task's CPSA
+  - "reg_B" clobbered
+
+  The two arguments must be different AR registers.
+--------------------------------------------------------------------------------
+*/
+#if XCHAL_CP_NUM > 0
+    .macro get_cpsa_from_tcb reg_A reg_B
+    /* Get TCB.pxEndOfStack from reg_A */
+    movi    \reg_B, offset_pxEndOfStack     /* Move &offset_pxEndOfStack into reg_B */
+    l32i    \reg_B, \reg_B, 0               /* Load offset_pxEndOfStack into reg_B */
+    add     \reg_A, \reg_A, \reg_B          /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */
+    l32i    \reg_A, \reg_A, 0               /* Load TCB.pxEndOfStack into reg_A */
+    /* Offset to start of CP save area */
+    movi    \reg_B, offset_cpsa             /* Move &offset_cpsa into reg_B */
+    l32i    \reg_B, \reg_B, 0               /* Load offset_cpsa into reg_B */
+    sub     \reg_A, \reg_A, \reg_B          /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */
+    /* Align down start of CP save area to 16 byte boundary */
+    movi    \reg_B, ~(0xF)
+    and     \reg_A, \reg_A, \reg_B          /* Align CP save area pointer to 16 bytes */
+    .endm
+#endif /* XCHAL_CP_NUM > 0 */
 
     .global     port_IntStack
     .global     port_switch_flag    //Required by sysview_tracing build
@@ -120,23 +158,19 @@ _frxt_int_enter:
     mull    a2,  a4, a2
     add     a1,  a1, a2                 /* for current proc */
 
-    #ifdef CONFIG_FREERTOS_FPU_IN_ISR
-    #if XCHAL_CP_NUM > 0
+    #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0
     rsr     a3, CPENABLE                /* Restore thread scope CPENABLE */
     addi    sp, sp,-4                   /* ISR will manage FPU coprocessor by forcing */
     s32i    a3, a1, 0                   /* its trigger */
     #endif
-    #endif
 
 .Lnested:
 1:
-    #ifdef CONFIG_FREERTOS_FPU_IN_ISR
-    #if XCHAL_CP_NUM > 0
+    #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0
     movi    a3,  0              /* whilst ISRs pending keep CPENABLE exception active */
     wsr     a3,  CPENABLE
     rsync
     #endif
-    #endif
 
     mov     a0,  a12                    /* restore return addr and return  */
     ret
@@ -174,14 +208,12 @@ _frxt_int_exit:
     s32i    a2,  a3, 0                  /* save nesting count              */
     bnez    a2,  .Lnesting              /* !=0 after decr so still nested  */
 
-    #ifdef CONFIG_FREERTOS_FPU_IN_ISR
-    #if XCHAL_CP_NUM > 0
+    #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0
     l32i    a3,  sp, 0                  /* Grab last CPENABLE before leave ISR */
     addi    sp,  sp, 4
     wsr     a3, CPENABLE
     rsync                               /* ensure CPENABLE was modified */
     #endif
-    #endif
 
     movi    a2,  pxCurrentTCB
     addx4   a2,  a4, a2
@@ -460,11 +492,11 @@ _frxt_dispatch:
 
     #if XCHAL_CP_NUM > 0
     /* Restore CPENABLE from task's co-processor save area. */
-    movi    a3, pxCurrentTCB            /* cp_state =                       */
-    getcoreid a2
-    addx4   a3,  a2, a3
-    l32i    a3, a3, 0
-    l32i    a2, a3, CP_TOPOFSTACK_OFFS     /* StackType_t                       *pxStack; */
+    movi    a2, pxCurrentTCB            /* cp_state =                       */
+    getcoreid a3
+    addx4   a2, a3, a2
+    l32i    a2, a2, 0
+    get_cpsa_from_tcb a2, a3            /* After this, pointer to CP save area is in a2, a3 is destroyed */
     l16ui   a3, a2, XT_CPENABLE         /* CPENABLE = cp_state->cpenable;   */
     wsr     a3, CPENABLE
     #endif
@@ -563,7 +595,7 @@ vPortYield:
 
     #if XCHAL_CP_NUM > 0
     /* Clear CPENABLE, also in task's co-processor state save area. */
-    l32i    a2,  a2, CP_TOPOFSTACK_OFFS /* a2 = pxCurrentTCB->cp_state      */
+    get_cpsa_from_tcb a2, a3            /* After this, pointer to CP save area is in a2, a3 is destroyed */
     movi    a3,  0
     wsr     a3,  CPENABLE
     beqz    a2,  1f
@@ -604,12 +636,12 @@ vPortYieldFromInt:
 
     #if XCHAL_CP_NUM > 0
     /* Save CPENABLE in task's co-processor save area, and clear CPENABLE.  */
-    movi    a3, pxCurrentTCB            /* cp_state =                       */
-    getcoreid a2
-    addx4   a3,  a2, a3
-    l32i    a3, a3, 0
+    movi    a2, pxCurrentTCB            /* cp_state =                       */
+    getcoreid a3
+    addx4   a2, a3, a2
+    l32i    a2, a2, 0
 
-    l32i    a2, a3, CP_TOPOFSTACK_OFFS
+    get_cpsa_from_tcb a2, a3            /* After this, pointer to CP save area is in a2, a3 is destroyed */
 
     rsr     a3, CPENABLE
     s16i    a3, a2, XT_CPENABLE         /* cp_state->cpenable = CPENABLE;   */
@@ -663,10 +695,58 @@ _frxt_task_coproc_state:
     l32i    a15, a15, 0                 /* && pxCurrentTCB != 0) {                 */
 
     beqz    a15, 2f
-    l32i    a15, a15, CP_TOPOFSTACK_OFFS
+    get_cpsa_from_tcb a15, a3           /* After this, pointer to CP save area is in a15, a3 is destroyed */
     ret
 
 1:  movi    a15, 0
 2:  ret
 
 #endif /* XCHAL_CP_NUM > 0 */
+
+/*
+**********************************************************************************************************
+*                                        _frxt_coproc_exc_hook
+*                                   void _frxt_coproc_exc_hook(void)
+*
+* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS.
+*
+* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions.
+* May only only use a2-4, a15 (all other regs must be preserved).
+* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h.
+*
+**********************************************************************************************************
+*/
+#if XCHAL_CP_NUM > 0
+
+    .globl  _frxt_coproc_exc_hook
+    .type   _frxt_coproc_exc_hook,@function
+    .align  4
+_frxt_coproc_exc_hook:
+
+    #if configNUM_CORES > 1
+    getcoreid a2                            /* a2 = xCurCoreID */
+    /* if (port_xSchedulerRunning[xCurCoreID] == 0) */
+    movi    a3, port_xSchedulerRunning
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0
+    beqz    a3, 1f                          /* Scheduler hasn't started yet. Return. */
+    /* if (port_interruptNesting[xCurCoreID] != 0) */
+    movi    a3, port_interruptNesting
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0
+    bnez    a3, 1f                          /* We are in an interrupt. Return*/
+    /* CP operations are incompatible with unpinned tasks. Thus we pin the task
+    to the current running core. */
+    movi    a3, pxCurrentTCB
+    addx4   a3, a2, a3
+    l32i    a3, a3, 0                       /* a3 = pxCurrentTCB[xCurCoreID] */
+    movi    a4, offset_xCoreID
+    l32i    a4, a4, 0                       /* a4 = offset_xCoreID */
+    add     a3, a3, a4                      /* a3 = &TCB.xCoreID */
+    s32i    a2, a3, 0                       /* TCB.xCoreID = xCurCoreID */
+1:
+    #endif /* configNUM_CORES > 1 */
+
+    ret
+
+#endif /* XCHAL_CP_NUM > 0 */

+ 63 - 1
components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h

@@ -72,4 +72,66 @@
 #endif
 .endm
 
-#endif
+/*
+--------------------------------------------------------------------------------
+  Macro spinlock_take
+
+  This macro will repeatedley attempt to atomically set a spinlock variable
+  using the s32c1i instruciton. A spinlock is considered free if its value is 0.
+
+  Entry:
+  - "reg_A/B" as scratch registers
+  - "lock_var" spinlock variable's symbol
+  - Interrupts must already be disabled by caller
+  Exit:
+  - Spinlock set to current core's ID (PRID)
+  - "reg_A/B" clobbered
+--------------------------------------------------------------------------------
+*/
+
+#if portNUM_PROCESSORS > 1
+
+    .macro  spinlock_take       reg_A reg_B lock_var
+
+    movi    \reg_A, \lock_var               /* reg_A = &lock_var */
+.L_spinlock_loop:
+    movi    \reg_B, 0                       /* Load spinlock free value (0) into SCOMPARE1 */
+    wsr     \reg_B, SCOMPARE1
+    rsync                                   /* Ensure that SCOMPARE1 is set before s32c1i executes */
+    rsr     \reg_B, PRID                    /* Load the current core's ID into reg_B */
+    s32c1i  \reg_B, \reg_A, 0               /* Attempt *lock_var = reg_B */
+    bnez    \reg_B, .L_spinlock_loop        /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
+
+    .endm
+
+#endif /* portNUM_PROCESSORS > 1 */
+
+/*
+--------------------------------------------------------------------------------
+  Macro spinlock_release
+
+  This macro will release a spinlock variable previously taken by the
+  spinlock_take macro.
+
+  Entry:
+  - "reg_A/B" as scratch registers
+  - "lock_var" spinlock variable's symbol
+  - Interrupts must already be disabled by caller
+  Exit:
+  - "reg_A/B" clobbered
+--------------------------------------------------------------------------------
+*/
+
+#if portNUM_PROCESSORS > 1
+
+    .macro spinlock_release     reg_A reg_B lock_var
+
+    movi    \reg_A, \lock_var               /* reg_A = &lock_var */
+    movi    \reg_B, 0
+    s32i    \reg_B, \reg_A, 0               /* Release the spinlock (*reg_A = 0) */
+
+    .endm
+
+#endif /* portNUM_PROCESSORS > 1 */
+
+#endif /* __XT_ASM_UTILS_H */

+ 19 - 12
components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S

@@ -397,17 +397,16 @@ May be called when a thread terminates or completes but does not delete
 the co-proc save area, to avoid the exception handler having to save the
 thread's co-proc state before another thread can use it (optimization).
 
-Needs to be called on the processor the thread was running on. Unpinned threads
-won't have an entry here because they get pinned as soon they use a coprocessor.
-
 Entry Conditions:
     A2  = Pointer to base of co-processor state save area.
+    A3  = Core ID of the task (must be pinned) who's coproc ownership we are
+          releasing.
 
 Exit conditions:
     None.
 
 Obeys ABI conventions per prototype:
-    void _xt_coproc_release(void * coproc_sa_base)
+    void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID)
 
 *******************************************************************************/
 
@@ -420,17 +419,21 @@ Obeys ABI conventions per prototype:
     .align  4
 _xt_coproc_release:
     ENTRY0                                  /* a2 = base of save area */
+                                            /* a3 = xTargetCoreID */
 
-    getcoreid a5
-    movi    a3, XCHAL_CP_MAX << 2
-    mull    a5, a5, a3
-    movi    a3, _xt_coproc_owner_sa         /* a3 = base of owner array */
-    add     a3, a3, a5
-
-    addi    a4, a3, XCHAL_CP_MAX << 2       /* a4 = top+1 of owner array */
+    movi    a4, XCHAL_CP_MAX << 2           /* a4 = size of an owner array */
+    mull    a4, a3, a4                      /* a4 = offset to the owner array of the target core */
+    movi    a3, _xt_coproc_owner_sa         /* a3 = base of all owner arrays */
+    add     a3, a3, a4                      /* a3 = base of owner array of the target core */
+    addi    a4, a3, XCHAL_CP_MAX << 2       /* a4 = top+1 of owner array of the target core */
     movi    a5, 0                           /* a5 = 0 (unowned) */
 
     rsil    a6, XCHAL_EXCM_LEVEL            /* lock interrupts */
+#if portNUM_PROCESSORS > 1
+    /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
+     * to ensure thread safe access of _xt_coproc_owner_sa between cores. */
+    spinlock_take a7 a8 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
 
 1:  l32i    a7, a3, 0                       /* a7 = owner at a3 */
     bne     a2, a7, 2f                      /* if (coproc_sa_base == owner) */
@@ -438,7 +441,11 @@ _xt_coproc_release:
 2:  addi    a3, a3, 1<<2                    /* a3 = next entry in owner array */
     bltu    a3, a4, 1b                      /* repeat until end of array */
 
-3:  wsr     a6, PS                          /* restore interrupts */
+#if portNUM_PROCESSORS > 1
+    /* Release previously taken spinlock */
+    spinlock_release a7 a8 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
+    wsr     a6, PS                          /* restore interrupts */
 
     RET0
 

+ 42 - 37
components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S

@@ -102,13 +102,8 @@
 #include "esp_private/panic_reason.h"
 #include "sdkconfig.h"
 #include "soc/soc.h"
+#include "xt_asm_utils.h"
 
-/*
-  Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
-  Please change this when the tcb structure is changed
-*/
-#define TASKTCB_XCOREID_OFFSET (0x38+configMAX_TASK_NAME_LEN+3)&~3
-.extern pxCurrentTCB
 
 /*
 --------------------------------------------------------------------------------
@@ -855,9 +850,25 @@ _xt_coproc_mask:
 _xt_coproc_owner_sa:
     .space  (XCHAL_CP_MAX * portNUM_PROCESSORS) << 2
 
-    .section .iram1,"ax"
-
+/* Spinlock per core for accessing _xt_coproc_owner_sa array
+ *
+ * 0 = Spinlock available
+ * PRID = Spinlock taken
+ *
+ * The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array.
+ * The array can be modified by multiple cores simultaneously (via _xt_coproc_exc
+ * and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread
+ * safe access of the _xt_coproc_owner_sa array.
+ */
+#if portNUM_PROCESSORS > 1
+    .global _xt_coproc_owner_sa_lock
+    .type   _xt_coproc_owner_sa_lock,@object
+    .align  16                      /* minimize crossing cache boundaries */
+_xt_coproc_owner_sa_lock:
+    .space  4
+#endif /* portNUM_PROCESSORS > 1 */
 
+    .section .iram1,"ax"
     .align  4
 .L_goto_invalid:
     j   .L_xt_coproc_invalid    /* not in a thread (invalid) */
@@ -907,29 +918,15 @@ _xt_coproc_exc:
     s32i    a4,  sp, XT_STK_A4
     s32i    a15, sp, XT_STK_A15
 
+    /* Call the RTOS coprocessor exception hook */
+    call0   XT_RTOS_CP_EXC_HOOK
+
     /* Get co-processor state save area of new owner thread. */
     call0   XT_RTOS_CP_STATE                /* a15 = new owner's save area */
-    #if CONFIG_FREERTOS_FPU_IN_ISR
-    beqz    a15, .L_skip_core_pin           /* CP used in ISR, skip task pinning */
-    #else
+    #if !CONFIG_FREERTOS_FPU_IN_ISR
     beqz    a15, .L_goto_invalid            /* not in a thread (invalid) */
     #endif
 
-#if configNUM_CORES > 1
-    /* CP operations are incompatible with unpinned tasks. Thus we pin the task
-    to the current running core. */
-    movi    a2, pxCurrentTCB
-    getcoreid a3                            /* a3 = current core ID */
-    addx4   a2, a3, a2
-    l32i    a2, a2, 0                       /* a2 = start of pxCurrentTCB[cpuid] */
-    addi    a2, a2, TASKTCB_XCOREID_OFFSET  /* a2 = &TCB.xCoreID */
-    s32i    a3, a2, 0                       /* TCB.xCoreID = current core ID */
-#endif // configNUM_CORES > 1
-
-#if CONFIG_FREERTOS_FPU_IN_ISR
-.L_skip_core_pin:
-#endif
-
     /* Enable the co-processor's bit in CPENABLE. */
     movi    a0, _xt_coproc_mask
     rsr     a4, CPENABLE                    /* a4 = CPENABLE */
@@ -939,16 +936,18 @@ _xt_coproc_exc:
     or      a4, a4, a2                      /* a4 = CPENABLE | (1 << n) */
     wsr     a4, CPENABLE
 
-/*
-Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value
-everywhere): _xt_coproc_release assumes it works like this in order not to need
-locking.
-*/
-    /* Grab correct xt_coproc_owner_sa for this core */
-    movi    a2, XCHAL_CP_MAX << 2
-    mull    a2, a2, a3                      /* multiply by current processor id */
-    movi    a3, _xt_coproc_owner_sa         /* a3 = base of owner array */
-    add     a3, a3, a2                      /* a3 = owner area needed for this processor */
+    /* Grab the xt_coproc_owner_sa owner array for current core */
+    getcoreid a3                            /* a3 = current core ID */
+    movi    a2, XCHAL_CP_MAX << 2           /* a2 = size of an owner array */
+    mull    a2, a2, a3                      /* a2 = offset to the owner array of the current core*/
+    movi    a3, _xt_coproc_owner_sa         /* a3 = base of all owner arrays */
+    add     a3, a3, a2                      /* a3 = base of owner array of the current core */
+
+#if portNUM_PROCESSORS > 1
+    /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
+     * to ensure thread safe access of _xt_coproc_owner_sa between cores. */
+    spinlock_take a0 a2 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
 
     /* Get old coprocessor owner thread (save area ptr) and assign new one.  */
     addx4   a3,  a5, a3                      /* a3 = &_xt_coproc_owner_sa[n] */
@@ -956,12 +955,18 @@ locking.
     s32i    a15, a3, 0                       /* _xt_coproc_owner_sa[n] = new */
     rsync                                    /* ensure wsr.CPENABLE is complete */
 
+#if portNUM_PROCESSORS > 1
+    /* Release previously taken spinlock */
+    spinlock_release a0 a2 _xt_coproc_owner_sa_lock
+#endif /* portNUM_PROCESSORS > 1 */
+
     /* Only need to context switch if new owner != old owner. */
     /* If float is necessary on ISR, we need to remove this check */
     /* below, because on restoring from ISR we may have new == old condition used
      * to force cp restore to next thread
+     * Todo: IDF-6418
      */
-    #ifndef CONFIG_FREERTOS_FPU_IN_ISR
+    #if !CONFIG_FREERTOS_FPU_IN_ISR
     bne     a15, a2, .L_switch_context
     j    .L_goto_done                       /* new owner == old, we're done */
 .L_switch_context:

+ 52 - 56
components/freertos/FreeRTOS-Kernel/tasks.c

@@ -1415,18 +1415,8 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB )
              * not return. */
             uxTaskNumber++;
 
-            /*
-             * We cannot immediately a task that is
-             * - Currently running on either core
-             * - If the task is not currently running but is pinned to the other (due to FPU cleanup)
-             * Todo: Allow deletion of tasks pinned to other core (IDF-5803)
-             */
-            #if ( configNUM_CORES > 1 )
-                xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) || ( pxTCB->xCoreID == !xCurCoreID ) ) ? pdFALSE : pdTRUE;
-            #else
-                xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE;
-            #endif /* configNUM_CORES > 1 */
-
+            /* We cannot free the task immediately if it is currently running (on either core) */
+            xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE;
             if( xFreeNow == pdFALSE )
             {
                 /* A task is deleting itself.  This cannot complete within the
@@ -1455,7 +1445,7 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB )
                 #if ( configNUM_CORES > 1 )
                     if( taskIS_CURRENTLY_RUNNING_ON_CORE( pxTCB, !xCurCoreID ) )
                     {
-                        /* SMP case of deleting a task running on a different core. Same issue
+                        /* SMP case of deleting a task currently running on a different core. Same issue
                          * as a task deleting itself, but we need to send a yield to this task now
                          * before we release xKernelLock.
                          *
@@ -4505,71 +4495,73 @@ static void prvInitialiseTaskLists( void )
 }
 /*-----------------------------------------------------------*/
 
+
 static void prvCheckTasksWaitingTermination( void )
 {
     /** THIS FUNCTION IS CALLED FROM THE RTOS IDLE TASK **/
 
     #if ( INCLUDE_vTaskDelete == 1 )
         {
-            BaseType_t xListIsEmpty;
-            BaseType_t core = xPortGetCoreID();
+            TCB_t * pxTCB;
 
-            /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL( &xKernelLock )
+            /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL()
              * being called too often in the idle task. */
             while( uxDeletedTasksWaitingCleanUp > ( UBaseType_t ) 0U )
             {
-                TCB_t * pxTCB = NULL;
-
-                taskENTER_CRITICAL( &xKernelLock );
-                {
-                    xListIsEmpty = listLIST_IS_EMPTY( &xTasksWaitingTermination );
-
-                    if( xListIsEmpty == pdFALSE )
+                #if ( configNUM_CORES > 1 )
+                    pxTCB = NULL;
+                    taskENTER_CRITICAL( &xKernelLock );
                     {
-                        /* We only want to kill tasks that ran on this core because e.g. _xt_coproc_release needs to
-                         * be called on the core the process is pinned on, if any */
-                        ListItem_t * target = listGET_HEAD_ENTRY( &xTasksWaitingTermination );
-
-                        for( ; target != listGET_END_MARKER( &xTasksWaitingTermination ); target = listGET_NEXT( target ) ) /*Walk the list */
+                        /* List may have already been cleared by the other core. Check again */
+                        if ( listLIST_IS_EMPTY( &xTasksWaitingTermination ) == pdFALSE )
                         {
-                            TCB_t * tgt_tcb = ( TCB_t * ) listGET_LIST_ITEM_OWNER( target );
-                            int affinity = tgt_tcb->xCoreID;
-
-                            /*Self deleting tasks are added to Termination List before they switch context. Ensure they aren't still currently running */
-                            if( ( pxCurrentTCB[ core ] == tgt_tcb ) || ( ( configNUM_CORES > 1 ) && ( pxCurrentTCB[ !core ] == tgt_tcb ) ) )
-                            {
-                                continue; /*Can't free memory of task that is still running */
-                            }
-
-                            if( ( affinity == core ) || ( affinity == tskNO_AFFINITY ) ) /*Find first item not pinned to other core */
+                            /* We can't delete a task if it is still running on
+                             * the other core. Keep walking the list until we
+                             * find a task we can free, or until we walk the
+                             * entire list. */
+                            ListItem_t *xEntry;
+                            for ( xEntry = listGET_HEAD_ENTRY( &xTasksWaitingTermination ); xEntry != listGET_END_MARKER( &xTasksWaitingTermination ); xEntry = listGET_NEXT( xEntry ) )
                             {
-                                pxTCB = tgt_tcb;
-                                break;
+                                if ( !taskIS_CURRENTLY_RUNNING( ( ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry ) ) ) )
+                                {
+                                    pxTCB = ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry );
+                                    ( void ) uxListRemove( &( pxTCB->xStateListItem ) );
+                                    --uxCurrentNumberOfTasks;
+                                    --uxDeletedTasksWaitingCleanUp;
+                                    break;
+                                }
                             }
                         }
+                    }
+                    taskEXIT_CRITICAL( &xKernelLock );
 
-                        if( pxTCB != NULL )
-                        {
-                            ( void ) uxListRemove( target ); /*Remove list item from list */
-                            --uxCurrentNumberOfTasks;
-                            --uxDeletedTasksWaitingCleanUp;
-                        }
+                    if ( pxTCB != NULL )
+                    {
+                        #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 )
+                            prvDeleteTLS( pxTCB );
+                        #endif
+                        prvDeleteTCB( pxTCB );
                     }
-                }
-                taskEXIT_CRITICAL( &xKernelLock ); /*Need to call deletion callbacks outside critical section */
+                    else
+                    {
+                        /* No task found to delete, break out of loop */
+                        break;
+                    }
+                #else
+                    taskENTER_CRITICAL( &xKernelLock );
+                    {
+                        pxTCB = listGET_OWNER_OF_HEAD_ENTRY( ( &xTasksWaitingTermination ) ); /*lint !e9079 void * is used as this macro is used with timers and co-routines too.  Alignment is known to be fine as the type of the pointer stored and retrieved is the same. */
+                        ( void ) uxListRemove( &( pxTCB->xStateListItem ) );
+                        --uxCurrentNumberOfTasks;
+                        --uxDeletedTasksWaitingCleanUp;
+                    }
+                    taskEXIT_CRITICAL( &xKernelLock );
 
-                if( pxTCB != NULL )                /*Call deletion callbacks and free TCB memory */
-                {
                     #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 )
                         prvDeleteTLS( pxTCB );
                     #endif
                     prvDeleteTCB( pxTCB );
-                }
-                else
-                {
-                    mtCOVERAGE_TEST_MARKER();
-                    break; /*No TCB found that could be freed by this core, break out of loop */
-                }
+                #endif  /* configNUM_CORES > 1 */
             }
         }
     #endif /* INCLUDE_vTaskDelete */
@@ -4848,6 +4840,10 @@ BaseType_t xTaskGetAffinity( TaskHandle_t xTask )
             vPortReleaseTaskMPUSettings( &( pxTCB->xMPUSettings ) );
         #endif
 
+        #ifdef portCLEAN_UP_COPROC
+            portCLEAN_UP_COPROC( ( void * ) pxTCB );
+        #endif
+
         #if ( ( configSUPPORT_DYNAMIC_ALLOCATION == 1 ) && ( configSUPPORT_STATIC_ALLOCATION == 0 ) && ( portUSING_MPU_WRAPPERS == 0 ) )
             {
                 /* The task can only have been allocated dynamically - free both

+ 1 - 1
components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h

@@ -171,7 +171,7 @@ This file get's pulled into assembly sources. Therefore, some includes need to b
 #elif CONFIG_FREERTOS_CHECK_STACKOVERFLOW_CANARY
 #define configCHECK_FOR_STACK_OVERFLOW                  2
 #endif
-#define configRECORD_STACK_HIGH_ADDRESS                 1
+#define configRECORD_STACK_HIGH_ADDRESS                 1   // This must be set as the port requires TCB.pxEndOfStack
 
 // ------------------- Run-time Stats ----------------------
 

+ 1 - 4
components/freertos/linker.lf

@@ -13,10 +13,7 @@ entries:
         port: pxPortInitialiseStack (default)
         port: xPortStartScheduler (default)
         if IDF_TARGET_ESP32 = y || IDF_TARGET_ESP32S3 = y :
-            port: vPortReleaseTaskMPUSettings (default)
-            tasks: xTaskCreateRestricted (default)
-            port: vPortStoreTaskMPUSettings (default)
-            tasks: vTaskAllocateMPURegions (default)
+            port: vPortCleanUpCoprocArea (default)
         tasks: prvTaskCheckFreeStackSpace (default)
         tasks: prvInitialiseNewTask (default)
         tasks: prvInitialiseTaskLists (default)

+ 41 - 25
components/freertos/test_apps/freertos/port/test_fpu_in_task.c

@@ -23,16 +23,21 @@ Test FPU usage from a task context
 Purpose:
     - Test that the FPU can be used from a task context
     - Test that FPU context is properly saved and restored
+    - Test that FPU context is cleaned up on task deletion by running multiple iterations
 Procedure:
     - Create TEST_PINNED_NUM_TASKS tasks pinned to each core
     - Start each task
     - Each task updates a float variable and then blocks (to allow other tasks to run thus forcing the an FPU context
       save and restore).
+    - Delete each task
+    - Repeat test for TEST_PINNED_NUM_ITERS iterations
 Expected:
     - Correct float value calculated by each task
+    - Each task cleans up its FPU context on deletion
 */
 
 #define TEST_PINNED_NUM_TASKS       3
+#define TEST_PINNED_NUM_ITERS       5
 
 static void pinned_task(void *arg)
 {
@@ -62,35 +67,38 @@ TEST_CASE("FPU: Usage in task", "[freertos]")
     SemaphoreHandle_t done_sem = xSemaphoreCreateCounting(configNUM_CORES * TEST_PINNED_NUM_TASKS, 0);
     TEST_ASSERT_NOT_EQUAL(NULL, done_sem);
 
-    TaskHandle_t task_handles[configNUM_CORES][TEST_PINNED_NUM_TASKS];
+    for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) {
+        TaskHandle_t task_handles[configNUM_CORES][TEST_PINNED_NUM_TASKS];
 
-    // Create test tasks for each core
-    for (int i = 0; i < configNUM_CORES; i++) {
-        for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
-            TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
+        // Create test tasks for each core
+        for (int i = 0; i < configNUM_CORES; i++) {
+            for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
+                TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
+            }
         }
-    }
 
-    // Start the created tasks simultaneously
-    for (int i = 0; i < configNUM_CORES; i++) {
-        for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
-            xTaskNotifyGive(task_handles[i][j]);
+        // Start the created tasks simultaneously
+        for (int i = 0; i < configNUM_CORES; i++) {
+            for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
+                xTaskNotifyGive(task_handles[i][j]);
+            }
         }
-    }
 
-    // Wait for the tasks to complete
-    for (int i = 0; i < configNUM_CORES * TEST_PINNED_NUM_TASKS; i++) {
-        xSemaphoreTake(done_sem, portMAX_DELAY);
-    }
+        // Wait for the tasks to complete
+        for (int i = 0; i < configNUM_CORES * TEST_PINNED_NUM_TASKS; i++) {
+            xSemaphoreTake(done_sem, portMAX_DELAY);
+        }
 
-    // Delete the tasks
-    for (int i = 0; i < configNUM_CORES; i++) {
-        for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
-            vTaskDelete(task_handles[i][j]);
+        // Delete the tasks
+        for (int i = 0; i < configNUM_CORES; i++) {
+            for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
+                vTaskDelete(task_handles[i][j]);
+            }
         }
+
+        vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts
     }
 
-    vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts
     vSemaphoreDelete(done_sem);
 }
 
@@ -101,18 +109,24 @@ Test FPU usage will pin an unpinned task
 
 Purpose:
     - Test that unpinned tasks are automatically pinned to the current core on the task's first use of the FPU
+    - Test that FPU context is cleaned up on task deletion by running multiple iterations
 Procedure:
     - Create an unpinned task
     - Task disables scheduling/preemption to ensure that it does not switch cores
     - Task uses the FPU
     - Task checks its core affinity after FPU usage
+    - Task deletes itself
+    - Repeat test for TEST_UNPINNED_NUM_ITERS iterations
 Expected:
     - Task remains unpinned until its first usage of the FPU
     - The task becomes pinned to the current core after first use of the FPU
+    - Each task cleans up its FPU context on deletion
 */
 
 #if configNUM_CORES > 1
 
+#define TEST_UNPINNED_NUM_ITERS     5
+
 static void unpinned_task(void *arg)
 {
     // Disable scheduling/preemption to make sure current core ID doesn't change
@@ -162,11 +176,13 @@ static void unpinned_task(void *arg)
 TEST_CASE("FPU: Usage in unpinned task", "[freertos]")
 {
     TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
-    // Create unpinned task
-    xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL);
-    // Wait for task to complete
-    ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
-    vTaskDelay(10); // Short delay to allow task memory to be freed
+    for (int iter = 0; iter < TEST_UNPINNED_NUM_ITERS; iter++) {
+        // Create unpinned task
+        xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL);
+        // Wait for task to complete
+        ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
+        vTaskDelay(10); // Short delay to allow task memory to be freed
+    }
 }
 
 #endif // configNUM_CORES > 1

+ 2 - 0
docs/doxygen-known-warnings.txt

@@ -1,3 +1,5 @@
 semphr.h:line: warning: argument 'pxStaticSemaphore' of command @param is not found in the argument list of xSemaphoreCreateCounting(uxMaxCount, uxInitialCount)
 task.h:line: warning: argument 'pxTaskDefinition' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions)
 task.h:line: warning: argument 'pxCreatedTask' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions)
+task.h:line: warning: argument 'pxTaskDefinition' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions)
+task.h:line: warning: argument 'pxCreatedTask' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions)

+ 0 - 1
docs/doxygen/Doxyfile

@@ -303,7 +303,6 @@ PREDEFINED             = \
     configNUM_THREAD_LOCAL_STORAGE_POINTERS=1 \
     configUSE_APPLICATION_TASK_TAG=1 \
     configTASKLIST_INCLUDE_COREID=1 \
-    portUSING_MPU_WRAPPERS=1 \
     PRIVILEGED_FUNCTION= \
     "ESP_EVENT_DECLARE_BASE(x)=extern esp_event_base_t x"
 

+ 1 - 3
tools/test_idf_monitor/Makefile

@@ -7,9 +7,7 @@ PREFIX_RISCV ?= riscv32-esp-elf-
 PROG_XTENSA := dummy_xtensa.elf
 PROG_RISCV := dummy_riscv.elf
 
-# This actually depends on the value of portUSING_MPU_WRAPPERS.
-# I.e. ESP32-S2 would also have TASK_NAME_OFFSET=52 since portUSING_MPU_WRAPPERS is 0.
-CPPFLAGS_XTENSA := -DTASK_NAME_OFFSET=56
+CPPFLAGS_XTENSA := -DTASK_NAME_OFFSET=52
 CPPFLAGS_RISCV := -DTASK_NAME_OFFSET=52
 
 all: $(PROG_XTENSA) $(PROG_RISCV)