Эх сурвалжийг харах

Make CPU1 int wdt / idle task wdt configurable, panic now properly disables other cpu, tick handler now also is called on cpu1, task wdt prints currently running tasks.

Jeroen Domburg 9 жил өмнө
parent
commit
89f7752cdd

+ 15 - 1
components/esp32/Kconfig

@@ -158,6 +158,13 @@ config INT_WDT_TIMEOUT_MS
     help
         The timeout of the watchdog, in miliseconds. Make this higher than the FreeRTOS tick rate.
 
+config INT_WDT_CHECK_CPU1
+    bool "Also watch CPU1 tick interrupt"
+    depends on INT_WDT && !FREERTOS_UNICORE
+    default y
+    help
+        Also detect if interrupts on CPU 1 are disabled for too long.
+
 config TASK_WDT
     bool "Task watchdog"
     default y
@@ -182,7 +189,7 @@ config TASK_WDT_TIMEOUT_S
         Timeout for the task WDT, in seconds.
 
 config TASK_WDT_CHECK_IDLE_TASK
-    bool "Task watchdog watches idle tasks"
+    bool "Task watchdog watches CPU0 idle task"
     depends on TASK_WDT
     default y
     help
@@ -192,6 +199,13 @@ config TASK_WDT_CHECK_IDLE_TASK
         idle task getting some runtime every now and then. Take Care: With this disabled, this 
         watchdog will trigger if no tasks register themselves within the timeout value.
 
+config TASK_WDT_CHECK_IDLE_TASK_CPU1
+    bool "Task watchdog also watches CPU1 idle task"
+    depends on TASK_WDT_CHECK_IDLE_TASK && !FREERTOS_UNICORE
+    default y
+    help
+        Also check the idle task that runs on CPU1.
+
 #The brownout detector code is disabled (by making it depend on a nonexisting symbol) because the current revision of ESP32
 #silicon has a bug in the brown-out detector, rendering it unusable for resetting the CPU.
 config BROWNOUT_DET

+ 23 - 0
components/esp32/int_wdt.c

@@ -30,6 +30,7 @@ This uses the TIMERG1 WDT.
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include "freertos/FreeRTOS.h"
 #include "freertos/task.h"
 #include <esp_types.h>
@@ -72,12 +73,34 @@ void int_wdt_init() {
 }
 
 
+#if CONFIG_INT_WDT_CHECK_CPU1
+//Not static; the ISR assembly checks this.
+bool int_wdt_app_cpu_ticked=false;
+
+void vApplicationTickHook(void) {
+	if (xPortGetCoreID()!=0) {
+		int_wdt_app_cpu_ticked=true;
+	} else {
+		//Only feed wdt if app cpu also ticked.
+		if (int_wdt_app_cpu_ticked) {
+			TIMERG1.wdt_wprotect=WDT_WRITE_KEY;
+			TIMERG1.wdt_config2=CONFIG_INT_WDT_TIMEOUT_MS*2;		//Set timeout before interrupt
+			TIMERG1.wdt_config3=CONFIG_INT_WDT_TIMEOUT_MS*4;		//Set timeout before reset
+			TIMERG1.wdt_feed=1;
+			TIMERG1.wdt_wprotect=0;
+			int_wdt_app_cpu_ticked=false;
+		}
+	}
+}
+#else
 void vApplicationTickHook(void) {
+	if (xPortGetCoreID()!=0) return;
 	TIMERG1.wdt_wprotect=WDT_WRITE_KEY;
 	TIMERG1.wdt_config2=CONFIG_INT_WDT_TIMEOUT_MS*2;		//Set timeout before interrupt
 	TIMERG1.wdt_config3=CONFIG_INT_WDT_TIMEOUT_MS*4;		//Set timeout before reset
 	TIMERG1.wdt_feed=1;
 	TIMERG1.wdt_wprotect=0;
 }
+#endif
 
 #endif

+ 8 - 0
components/esp32/task_wdt.c

@@ -75,6 +75,11 @@ static void IRAM_ATTR task_wdt_isr(void *arg) {
 			printf(" - %s (%s)\n", pcTaskGetTaskName(wdttask->task_handle), cpu);
 		}
 	}
+	ets_printf("Tasks currently running:\n");
+	for (int x=0; x<portNUM_PROCESSORS; x++) {
+		ets_printf("CPU %d: %s\n", x, pcTaskGetTaskName(xTaskGetCurrentTaskHandleForCPU(x)));
+	}
+
 #if CONFIG_TASK_WDT_PANIC
 	ets_printf("Aborting.\n");
 	abort();
@@ -170,6 +175,9 @@ void task_wdt_init() {
 
 #if CONFIG_TASK_WDT_CHECK_IDLE_TASK
 void vApplicationIdleHook(void) {
+#if !CONFIG_TASK_WDT_CHECK_IDLE_TASK_CPU1
+	if (xPortGetCoreID()!=0) return;
+#endif
 	task_wdt_feed();
 }
 #endif

+ 3 - 2
components/freertos/include/freertos/panic.h

@@ -7,8 +7,9 @@
 #define PANIC_RSN_DOUBLEEXCEPTION 2
 #define PANIC_RSN_KERNELEXCEPTION 3
 #define PANIC_RSN_COPROCEXCEPTION 4
-#define PANIC_RSN_INTWDT 5
-#define PANIC_RSN_MAX 5
+#define PANIC_RSN_INTWDT_CPU0 5
+#define PANIC_RSN_INTWDT_CPU1 6
+#define PANIC_RSN_MAX 6
 
 
 #ifndef __ASSEMBLER__

+ 11 - 0
components/freertos/include/freertos/task.h

@@ -1933,6 +1933,17 @@ TickType_t uxTaskResetEventItemValue( void ) PRIVILEGED_FUNCTION;
  */
 TaskHandle_t xTaskGetCurrentTaskHandle( void ) PRIVILEGED_FUNCTION;
 
+
+
+/*
+ * Return the handle of the task running on a certain CPU. Because of
+ * the nature of SMP processing, there is no guarantee that this
+ * value will still be valid on return and should only be used for
+ * debugging purposes.
+ */
+TaskHandle_t xTaskGetCurrentTaskHandleForCPU( BaseType_t cpuid );
+
+
 /*
  * Capture the current time status for future reference.
  */

+ 10 - 9
components/freertos/panic.c

@@ -109,16 +109,16 @@ void commonErrorHandler(XtExcFrame *frame);
 static void haltOtherCore() {
 	if (xPortGetCoreID()==0) {
 		//Kill app cpu
-		CLEAR_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_STALL_APPCPU_C1<<RTC_CNTL_SW_STALL_APPCPU_C1_S);
-		SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, 0x21<<RTC_CNTL_SW_STALL_APPCPU_C1_S);
-		CLEAR_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, RTC_CNTL_SW_STALL_APPCPU_C0<<RTC_CNTL_SW_STALL_APPCPU_C0_S);
-		SET_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, 2<<RTC_CNTL_SW_STALL_APPCPU_C0_S);
+		CLEAR_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, RTC_CNTL_SW_STALL_APPCPU_C1_M);
+		SET_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, 0x21<<RTC_CNTL_SW_STALL_APPCPU_C1_S);
+		CLEAR_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_STALL_APPCPU_C0_M);
+		SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, 2<<RTC_CNTL_SW_STALL_APPCPU_C0_S);
 	} else {
 		//Kill pro cpu
-		CLEAR_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_STALL_PROCPU_C1<<RTC_CNTL_SW_STALL_PROCPU_C1_S);
-		SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, 0x21<<RTC_CNTL_SW_STALL_PROCPU_C1_S);
-		CLEAR_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, RTC_CNTL_SW_STALL_PROCPU_C0<<RTC_CNTL_SW_STALL_PROCPU_C0_S);
-		SET_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, 2<<RTC_CNTL_SW_STALL_PROCPU_C0_S);
+		CLEAR_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, RTC_CNTL_SW_STALL_PROCPU_C1_M);
+		SET_PERI_REG_MASK(RTC_CNTL_SW_CPU_STALL_REG, 0x21<<RTC_CNTL_SW_STALL_PROCPU_C1_S);
+		CLEAR_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_STALL_PROCPU_C0_M);
+		SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, 2<<RTC_CNTL_SW_STALL_PROCPU_C0_S);
 	}
 }
 
@@ -143,7 +143,8 @@ void panicHandler(XtExcFrame *frame) {
 			"Double exception",
 			"Unhandled kernel exception",
 			"Coprocessor exception",
-			"Interrupt wdt timeout"
+			"Interrupt wdt timeout on CPU0",
+			"Interrupt wdt timeout on CPU1",
 		};
 	const char *reason=reasons[0];
 	//The panic reason is stored in the EXCCAUSE register.

+ 30 - 0
components/freertos/tasks.c

@@ -1974,6 +1974,23 @@ BaseType_t xSwitchRequired = pdFALSE;
 		  We can't really calculate what we need, that's done on core 0... just assume we need a switch.
 		  ToDo: Make this more intelligent? -- JD
 		*/
+		//We do need the tick hook to satisfy the int watchdog.
+		#if ( configUSE_TICK_HOOK == 1 )
+		{
+			/* Guard against the tick hook being called when the pended tick
+			count is being unwound (when the scheduler is being unlocked). */
+			if( uxPendedTicks == ( UBaseType_t ) 0U )
+			{
+				vApplicationTickHook();
+			}
+			else
+			{
+				mtCOVERAGE_TEST_MARKER();
+			}
+		}
+		#endif /* configUSE_TICK_HOOK */
+
+
 		return pdTRUE;
 	}
 
@@ -3565,6 +3582,19 @@ TCB_t *pxTCB;
 		return xReturn;
 	}
 
+	TaskHandle_t xTaskGetCurrentTaskHandleForCPU( BaseType_t cpuid )
+	{
+	TaskHandle_t xReturn=NULL;
+
+		//Xtensa-specific: the pxCurrentPCB pointer is atomic so we shouldn't need a lock.
+		if (cpuid < portNUM_PROCESSORS) {
+			xReturn = pxCurrentTCB[ cpuid ];
+		}
+
+		return xReturn;
+	}
+
+
 #endif /* ( ( INCLUDE_xTaskGetCurrentTaskHandle == 1 ) || ( configUSE_MUTEXES == 1 ) ) */
 /*-----------------------------------------------------------*/
 

+ 15 - 2
components/freertos/xtensa_vectors.S

@@ -92,7 +92,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "xtensa_rtos.h"
 #include "panic.h"
-
+#include "sdkconfig.h"
 /*
   Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
   Please change this when the tcb structure is changed
@@ -1616,12 +1616,25 @@ _xt_highint4:
     ADD HIGH PRIORITY LEVEL 4 INTERRUPT HANDLER CODE HERE.
     */
 
+
+
     /* On the ESP32, this level is used for the INT_WDT handler. If that triggers, the program is stuck with interrupts
        off and the CPU should panic. */
 	rsr     a0, EXCSAVE_4
 	wsr     a0, EXCSAVE_1 /* panic handler reads this register */
-	movi    a0,PANIC_RSN_INTWDT
+	/* Set EXCCAUSE to reflect cause of the wdt int trigger */
+	movi    a0,PANIC_RSN_INTWDT_CPU0
 	wsr     a0,EXCCAUSE
+#if CONFIG_INT_WDT_CHECK_CPU1
+	/* Check if the cause is the app cpu failing to tick.*/
+	movi    a0, int_wdt_app_cpu_ticked
+	l32i    a0, a0, 0
+	bnez    a0, 1f
+	/* It is. Modify cause. */
+	movi    a0,PANIC_RSN_INTWDT_CPU1
+	wsr     a0,EXCCAUSE
+1:
+#endif
     call0 _xt_panic