Эх сурвалжийг харах

Merge branch 'feature/p4_multi_core_wdt' into 'master'

WDT: Multi-core support for RISC-V WDT

Closes IDF-6516 and IDF-8136

See merge request espressif/esp-idf!25689
Marius Vikhammer 2 жил өмнө
parent
commit
88af378d2e

+ 8 - 10
components/esp_system/crosscore_int.c

@@ -30,12 +30,10 @@
 
 #define REASON_YIELD            BIT(0)
 #define REASON_FREQ_SWITCH      BIT(1)
-#define REASON_GDB_CALL         BIT(3)
-
-#if CONFIG_IDF_TARGET_ARCH_XTENSA
 #define REASON_PRINT_BACKTRACE  BIT(2)
+#define REASON_GDB_CALL         BIT(3)
 #define REASON_TWDT_ABORT       BIT(4)
-#endif
+
 
 static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED;
 static volatile uint32_t reason[portNUM_PROCESSORS];
@@ -100,20 +98,22 @@ static void IRAM_ATTR esp_crosscore_isr(void *arg) {
         update_breakpoints();
     }
 #endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME
-#if CONFIG_IDF_TARGET_ARCH_XTENSA // IDF-2986
+
     if (my_reason_val & REASON_PRINT_BACKTRACE) {
         esp_backtrace_print(100);
     }
 
+
+
 #if CONFIG_ESP_TASK_WDT_EN
     if (my_reason_val & REASON_TWDT_ABORT) {
-        extern void task_wdt_timeout_abort_xtensa(bool);
+        extern void task_wdt_timeout_abort(bool);
         /* Called from a crosscore interrupt, thus, we are not the core that received
          * the TWDT interrupt, call the function with `false` as a parameter. */
-        task_wdt_timeout_abort_xtensa(false);
+        task_wdt_timeout_abort(false);
     }
 #endif // CONFIG_ESP_TASK_WDT_EN
-#endif // CONFIG_IDF_TARGET_ARCH_XTENSA
+
 }
 
 //Initialize the crosscore interrupt on this core. Call this once
@@ -182,7 +182,6 @@ void IRAM_ATTR esp_crosscore_int_send_gdb_call(int core_id)
     esp_crosscore_int_send(core_id, REASON_GDB_CALL);
 }
 
-#if CONFIG_IDF_TARGET_ARCH_XTENSA
 void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id)
 {
     esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE);
@@ -193,4 +192,3 @@ void IRAM_ATTR esp_crosscore_int_send_twdt_abort(int core_id) {
     esp_crosscore_int_send(core_id, REASON_TWDT_ABORT);
 }
 #endif // CONFIG_ESP_TASK_WDT_EN
-#endif

+ 5 - 0
components/esp_system/include/esp_debug_helpers.h

@@ -103,6 +103,11 @@ esp_err_t esp_backtrace_print_from_frame(int depth, const esp_backtrace_frame_t*
  *
  * @param depth The maximum number of stack frames to print (should be > 0)
  *
+ * @note On RISC-V targets printing backtrace at run-time is only available if
+ *       CONFIG_ESP_SYSTEM_USE_EH_FRAME is selected. Otherwise we simply print
+ *       a register dump. Function assumes it is called in a context where the
+ *       calling task will not migrate to another core, e.g. interrupts disabled/panic handler.
+ *
  * @return
  *      - ESP_OK    Backtrace successfully printed to completion or to depth limit
  *      - ESP_FAIL  Backtrace is corrupted

+ 1 - 0
components/esp_system/port/CMakeLists.txt

@@ -41,6 +41,7 @@ if(CONFIG_IDF_TARGET_ARCH_XTENSA)
 elseif(CONFIG_IDF_TARGET_ARCH_RISCV)
     list(APPEND srcs "arch/riscv/expression_with_stack.c"
                      "arch/riscv/panic_arch.c"
+                     "arch/riscv/debug_helpers.c"
                      "arch/riscv/debug_stubs.c")
 endif()
 

+ 60 - 0
components/esp_system/port/arch/riscv/debug_helpers.c

@@ -0,0 +1,60 @@
+/*
+ * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "esp_debug_helpers.h"
+#include "sdkconfig.h"
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "esp_private/freertos_debug.h"
+#include "esp_err.h"
+#include "esp_attr.h"
+#include "riscv/rvruntime-frames.h"
+
+#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
+#include "esp_private/eh_frame_parser.h"
+#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
+
+#if !CONFIG_ESP_SYSTEM_USE_EH_FRAME
+/* Function used to print all the registers pointed by the given frame .*/
+extern void panic_print_registers(const void *frame, int core);
+#endif // !CONFIG_ESP_SYSTEM_USE_EH_FRAME
+
+/* Targets based on a RISC-V CPU cannot perform backtracing that easily.
+    * We have two options here:
+    *     - Perform backtracing at runtime.
+    *     - Let IDF monitor do the backtracing for us. Used during panic already.
+    * This could be configurable, choosing one or the other depending on
+    * CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
+    *
+    * In both cases, this takes time, and we might be in an ISR, we must
+    * exit this handler as fast as possible, then we will simply print
+    * the interruptee's registers.
+    */
+esp_err_t IRAM_ATTR esp_backtrace_print(int depth)
+{
+    (void)depth;
+
+    const int current_core = xPortGetCoreID();
+
+    TaskSnapshot_t snapshot = { 0 };
+    BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandleForCPU(current_core), &snapshot);
+
+    if (ret != pdTRUE) {
+        return ESP_ERR_NOT_FOUND;
+    }
+
+    void *frame = snapshot.pxTopOfStack;
+
+#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
+        esp_rom_printf("Print CPU %d (current core) backtrace\n", current_core);
+        esp_eh_frame_print_backtrace(frame);
+#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
+        esp_rom_printf("Print CPU %d (current core) registers\n", current_core);
+        panic_print_registers(frame, current_core);
+        esp_rom_printf("\r\n");
+#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
+
+    return ESP_OK;
+}

+ 13 - 16
components/esp_system/port/arch/riscv/panic_arch.c

@@ -13,6 +13,7 @@
 #else
 #include "soc/extmem_reg.h"
 #endif
+#include "soc/soc_caps.h"
 #include "esp_private/panic_internal.h"
 #include "esp_private/panic_reason.h"
 #include "riscv/rvruntime-frames.h"
@@ -276,8 +277,6 @@ static void panic_print_register_array(const char* names[], const uint32_t* regs
 
 void panic_print_registers(const void *f, int core)
 {
-    const RvExcFrame *frame = (RvExcFrame *)f;
-
     /**
      * General Purpose context, only print ABI name
      */
@@ -290,7 +289,7 @@ void panic_print_registers(const void *f, int core)
     };
 
     panic_print_str("Core ");
-    panic_print_dec(frame->mhartid);
+    panic_print_dec(core);
     panic_print_str(" register dump:");
     panic_print_register_array(desc, f, DIM(desc));
 }
@@ -319,23 +318,21 @@ void panic_soc_fill_info(void *f, panic_info_t *info)
         info->reason = "Cache error";
         info->details = print_cache_err_details;
 
-    } else if (frame->mcause == ETS_INT_WDT_INUM) {
-        /* Watchdog interrupt occured, get the core on which it happened
-         * and update the reason/message accordingly. */
-
-        const int core = esp_cache_err_get_cpuid();
+    } else if (frame->mcause == PANIC_RSN_INTWDT_CPU0) {
+        const int core = 0;
         info->core = core;
         info->exception = PANIC_EXCEPTION_IWDT;
-
-#if SOC_CPU_NUM > 1
-#error "TODO: define PANIC_RSN_INTWDT_CPU1 in panic_reason.h"
-        _Static_assert(PANIC_RSN_INTWDT_CPU0 + 1 == PANIC_RSN_INTWDT_CPU1,
-                       "PANIC_RSN_INTWDT_CPU1 must be equal to PANIC_RSN_INTWDT_CPU0 + 1");
-        info->reason = core == 0 ? "Interrupt wdt timeout on CPU0" : "Interrupt wdt timeout on CPU1";
-#else
         info->reason = "Interrupt wdt timeout on CPU0";
-#endif
     }
+#if SOC_CPU_CORES_NUM > 1
+    else if (frame->mcause == PANIC_RSN_INTWDT_CPU1) {
+        const int core = 1;
+        info->core = core;
+        info->exception = PANIC_EXCEPTION_IWDT;
+        info->reason = "Interrupt wdt timeout on CPU1";
+    }
+#endif
+
 #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
     else if (frame->mcause == ETS_ASSIST_DEBUG_INUM) {
         info->core = esp_cache_err_get_cpuid();

+ 17 - 66
components/esp_system/task_wdt/task_wdt.c

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -24,6 +24,12 @@
 #include "esp_private/esp_task_wdt.h"
 #include "esp_private/esp_task_wdt_impl.h"
 
+
+#if CONFIG_IDF_TARGET_ARCH_RISCV
+#include "riscv/rvruntime-frames.h"
+#endif //CONFIG_IDF_TARGET_ARCH_RISCV
+
+
 #if CONFIG_ESP_SYSTEM_USE_EH_FRAME
 #include "esp_private/eh_frame_parser.h"
 #endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
@@ -331,65 +337,6 @@ static void subscribe_idle(uint32_t core_mask)
  *
  */
 
-#if CONFIG_IDF_TARGET_ARCH_RISCV
-
-static void task_wdt_timeout_handling(int cores_fail, bool panic)
-{
-    /* For RISC-V, make sure the cores that fail is only composed of core 0. */
-    assert(cores_fail == BIT(0));
-
-    const int current_core = 0;
-    TaskSnapshot_t snapshot = { 0 };
-    BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
-
-    if (p_twdt_obj->panic) {
-        assert(ret == pdTRUE);
-        ESP_EARLY_LOGE(TAG, "Aborting.");
-        esp_reset_reason_set_hint(ESP_RST_TASK_WDT);
-        /**
-         * We cannot simply use `abort` here because the `panic` handler would
-         * interpret it as if the task watchdog ISR aborted and so, print this
-         * current ISR backtrace/context. We want to trick the `panic` handler
-         * to think the task itself is aborting.
-         * To do so, we need to get the interruptee's top of the stack. It contains
-         * its own context, saved when the interrupt occurred.
-         * We must also set the global flag that states that an abort occurred
-         * (and not a panic)
-         **/
-        g_panic_abort = true;
-        g_twdt_isr = true;
-        void *frame = (void *) snapshot.pxTopOfStack;
-#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
-        ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
-#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
-        xt_unhandled_exception(frame);
-    } else {
-        /* Targets based on a RISC-V CPU cannot perform backtracing that easily.
-         * We have two options here:
-         *     - Perform backtracing at runtime.
-         *     - Let IDF monitor do the backtracing for us. Used during panic already.
-         * This could be configurable, choosing one or the other depending on
-         * CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
-         *
-         * In both cases, this takes time, and we are in an ISR, we must
-         * exit this handler as fast as possible, then we will simply print
-         * the interruptee's registers.
-         */
-        if (ret == pdTRUE) {
-            void *frame = (void *) snapshot.pxTopOfStack;
-#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
-            ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
-            esp_eh_frame_print_backtrace(frame);
-#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
-            ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) registers", current_core);
-            panic_print_registers(frame, current_core);
-            esp_rom_printf("\r\n");
-#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
-        }
-    }
-}
-
-#else // CONFIG_IDF_TARGET_ARCH_RISCV
 
 /**
  * Function simulating an abort coming from the interrupted task of the current
@@ -398,7 +345,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
  * in the case where the other core (than the main one) has to abort because one
  * of his tasks didn't reset the TWDT on time.
  */
-void task_wdt_timeout_abort_xtensa(bool current_core)
+void task_wdt_timeout_abort(bool current_core)
 {
     TaskSnapshot_t snapshot = { 0 };
     BaseType_t ret = pdTRUE;
@@ -408,26 +355,32 @@ void task_wdt_timeout_abort_xtensa(bool current_core)
     ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
     assert(ret == pdTRUE);
     g_panic_abort = true;
-    /* For Xtensa, we should set this flag as late as possible, as this function may
+    /* We should set this flag as late as possible, as this function may
      * be called after a crosscore interrupt. Indeed, a higher interrupt may occur
      * after calling the crosscore interrupt, if its handler fails, this flag
      * shall not be set.
      * This flag will tell the coredump component (if activated) that yes, we are in
      * an ISR context, but it is intended, it is not because an ISR encountered an
-     * exception. If we don't set such flag, later tested by coredump, the later would
+     * exception. If we don't set such flag, later tested by coredump, the latter would
      * switch the execution frame/context we are giving it to the interrupt stack.
      * For details about this behavior in the TODO task: IDF-5694
      */
     g_twdt_isr = true;
     void *frame = (void *) snapshot.pxTopOfStack;
+
+#if CONFIG_ESP_SYSTEM_USE_EH_FRAME | CONFIG_IDF_TARGET_ARCH_XTENSA
     if (current_core) {
         ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID());
     } else {
         ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID());
     }
+#endif
+
     xt_unhandled_exception(frame);
 }
 
+
+
 static void task_wdt_timeout_handling(int cores_fail, bool panic)
 {
     const int current_core = xPortGetCoreID();
@@ -453,7 +406,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
         }
 #endif // !CONFIG_FREERTOS_UNICORE
         /* Current core is failing, abort right now */
-        task_wdt_timeout_abort_xtensa(true);
+        task_wdt_timeout_abort(true);
     } else {
         /* Print backtrace of the core that failed to reset the watchdog */
         if (cores_fail & BIT(current_core)) {
@@ -470,8 +423,6 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
     }
 }
 
-#endif // CONFIG_IDF_TARGET_ARCH_RISCV
-
 
 // ---------------------- Callbacks ------------------------
 

+ 8 - 6
components/riscv/include/esp_private/panic_reason.h

@@ -5,14 +5,16 @@
  */
 #pragma once
 
-/* Since riscv does not replace mcause with "pseudo_reason" as it xtensa does
- * PANIC_RSN_* defined with original interrupt numbers to make it work in
- * common code
+#include "soc/soc_caps.h"
+
+/* Need a way to signal which core caused the INT WDT like we do with EXCAUSE on xtensa.
+   Choosing a large number that is unlikely to conflict with any actual riscv mcauses
+   bit 12 and above are always zero on the CPU used by P4
  */
 #define PANIC_RSN_INTWDT_CPU0 ETS_INT_WDT_INUM
-
-//TODO: IDF-7511
 #if SOC_CPU_CORES_NUM > 1
-#define PANIC_RSN_INTWDT_CPU1 ETS_INT_WDT_INUM
+#define PANIC_RSN_INTWDT_CPU1_FLAG (1 << 12)
+#define PANIC_RSN_INTWDT_CPU1 (PANIC_RSN_INTWDT_CPU1_FLAG | ETS_INT_WDT_INUM)
 #endif
+
 #define PANIC_RSN_CACHEERR    3

+ 14 - 0
components/riscv/vectors.S

@@ -10,6 +10,7 @@
 #include "soc/soc_caps.h"
 #include "sdkconfig.h"
 #include "esp_private/vectors_const.h"
+#include "esp_private/panic_reason.h"
 
 
     .equ SAVE_REGS, 32
@@ -224,6 +225,19 @@ _call_panic_handler:
     /* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */
     add   a1, a1, -16
 #endif // CONFIG_SOC_INT_CLIC_SUPPORTED
+
+#if CONFIG_ESP_INT_WDT_CHECK_CPU1
+    /* Check if this was a INT WDT */
+    li t0, PANIC_RSN_INTWDT_CPU0
+    bne a1, t0, _store_mcause
+    /* Check if the cause is the app cpu failing to tick, if so then update mcause to reflect this*/
+    lw t0, int_wdt_cpu1_ticked
+    bnez t0, _store_mcause
+    li t0, PANIC_RSN_INTWDT_CPU1_FLAG
+    add a1, a1, t0
+#endif
+
+_store_mcause:
     sw    a1, RV_STK_MCAUSE(sp)
     call  panic_from_isr
     /* We arrive here if the exception handler has returned. This means that