Re-enable bootloader interrupts after a hard fault [Teensy 4.1]

I was wondering similar when I mentioned earlier wonder if we can keep it elsewhere...
Again wonder in two different ways:

Printed report after reboot - As example show we could detect !Serial and then see if SD available, or LFS...

Or something like EEPROM sort of as you mentioned, but was also thinking alternative, like maybe like reserve one (or more) Flash sectors, Just before EEPROM, that You write out the crash data. Probably walk your way down sector, writing each new crash so you don't have to erase... But probably still too much stuff to have to do in order to write a sector?

There is one thing that I am not crazy about with storing the crash report on the T4.1 directly for production use. If I have a T4.1 crashing for unknow reasons with the sketch I have loaded and then store the crash report on FLASH or EEPROM I then have to load a second program to read the crash report. If I put it on a SD Card think that would be an easier approach. But thats just me.
 
There is one thing that I am not crazy about with storing the crash report on the T4.1 directly for production use. If I have a T4.1 crashing for unknow reasons with the sketch I have loaded and then store the crash report on FLASH or EEPROM I then have to load a second program to read the crash report. If I put it on a SD Card think that would be an easier approach. But thats just me.
Yep - it may depend on how one uses it afterwords... That is again the reboot, could detect it and print it out like the current library stuff, and optionally mark it as processed. Alternatively the product code, could build in support, like a diagnostic command, or if SerialUSB1 is connected...

For now, I think the ability to store to FS on reboot is good enough for now. You can then decide if the logging should be last one wins (replace file) or append...
 

Tried Franks bus fault test case with the updates to core and its working:
Code:
D:\Users\Merli\Documents\Arduino\CrashRepot_example\CrashRepot_example.ino Jun 24 2021 09:10:21
 millis() now 389
CrashReport:
  Fault occurred at: 09:10:26
  Temperature and System Clock at time of fault: 
    Temp = 44.549549 deg C,  Sys Clock = 600 Mhz
  length: 11
  IPSR: 5
  CFSR: 400
	(IMPRECISERR) Data bus error but address not related to instruction
  MMFAR: 0
  BFAR: 0
  return address: 6524
  XPSR: 210E0000
  crc: 8B76B150
Reboot was caused by 8 second auto-reboot after fault or bad interrupt detected

	RECOVERED from Crash
Haven't tried the new lib yet though
 
FYI - I synced up cores... And extended the test '5' to look at a few others..
Code:
    else if ( cc == '5' ) {
      Serial.printf("SCB_SHPRx: %x %x %x SHCSR:%x\n", SCB_SHPR1, SCB_SHPR2, SCB_SHPR3, SCB_SHCSR);

      // Lets try printing int LPSPI3 registers.
      Serial.println("Try LPSPI3"); Serial.flush();
      Serial.printf("    VERID: %x\n", LPSPI3_VERID); Serial.flush();
      Serial.printf("    PARAM: %x\n", LPSPI3_PARAM); Serial.flush();
      Serial.printf("    CR: %x\n", LPSPI3_CR); Serial.flush();
      Serial.printf("    SR: %x\n", LPSPI3_SR); Serial.flush();
      Serial.printf("    IER: %x\n", LPSPI3_IER); Serial.flush();
      Serial.printf("    DER: %x\n", LPSPI3_DER); Serial.flush();
      Serial.printf("    CFGR0: %x\n", LPSPI3_CFGR0); Serial.flush();
      Serial.printf("    CFGR1: %x\n", LPSPI3_CFGR1); Serial.flush();

      Serial.println("Try LPUART5"); Serial.flush();
      Serial.printf("    VERID:%x\n", LPUART5_VERID); Serial.flush();
      Serial.printf("    PARAM:%x\n", LPUART5_PARAM); Serial.flush();
      Serial.printf("    GLOBAL:%x\n", LPUART5_GLOBAL); Serial.flush();
      Serial.printf("    PINCFG:%x\n", LPUART5_PINCFG); Serial.flush();
      Serial.printf("    BAUD:%x\n", LPUART5_BAUD); Serial.flush();
      Serial.printf("    STAT:%x\n", LPUART5_STAT); Serial.flush();
      Serial.printf("    CTRL:%x\n", LPUART5_CTRL); Serial.flush();
      Serial.printf("    DATA:%x\n", LPUART5_DATA); Serial.flush();
      Serial.printf("    MATCH:%x\n", LPUART5_MATCH); Serial.flush();
      Serial.printf("    MODIR:%x\n", LPUART5_MODIR); Serial.flush();
      Serial.printf("    FIFO:%x\n", LPUART5_FIFO); Serial.flush();
      Serial.printf("    WATER:%x\n", LPUART5_WATER); Serial.flush();
      
      Serial.println("Try FlexIO"); Serial.flush();
      // CCM_CCGR5 |= CCM_CCGR5_FLEXIO1(CCM_CCGR_ON);
      IMXRT_FLEXIO_t *p = &IMXRT_FLEXIO1_S;
      Serial.printf(" % x\n", (uint32_t)p); Serial.flush();
      Serial.printf("VERID: % x PARAM: % x CTRL: % x PIN: % x\n", p->VERID, p->PARAM, p->CTRL, p->PIN); Serial.flush();
      Serial.printf("SHIFTSTAT: % x SHIFTERR = % x TIMSTAT = % x\n", p->SHIFTSTAT, p->SHIFTERR, p->TIMSTAT); Serial.flush();
      Serial.printf("SHIFTSIEN: % x SHIFTEIEN = % x TIMIEN = % x\n", p->SHIFTSIEN, p->SHIFTEIEN, p->TIMIEN); Serial.flush();
      Serial.printf("SHIFTSDEN: % x SHIFTSTATE = % x\n", p->SHIFTSDEN, p->SHIFTSTATE); Serial.flush();
      Serial.printf("SHIFTCTL: % x % x % x % x\n", p->SHIFTCTL[0], p->SHIFTCTL[1], p->SHIFTCTL[2], p->SHIFTCTL[3]); Serial.flush();
      Serial.printf("SHIFTCFG: % x % x % x % x\n", p->SHIFTCFG[0], p->SHIFTCFG[1], p->SHIFTCFG[2], p->SHIFTCFG[3]); Serial.flush();
      Serial.printf("TIMCTL: % x % x % x % x\n", p->TIMCTL[0], p->TIMCTL[1], p->TIMCTL[2], p->TIMCTL[3]); Serial.flush();
      Serial.printf("TIMCFG: % x % x % x % x\n", p->TIMCFG[0], p->TIMCFG[1], p->TIMCFG[2], p->TIMCFG[3]); Serial.flush();
      Serial.printf("TIMCMP: % x % x % x % x\n", p->TIMCMP[0], p->TIMCMP[1], p->TIMCMP[2], p->TIMCMP[3]); Serial.flush();
      Serial.println("made it through");
    }
Code:
C:\Users\kurte\Documents\Arduino\Teensy Tests\Test_CrashReport\Test_CrashReport.ino Jun 24 2021 06:25:59
 millis() now 443

It's your fault!

SCB_SHPRx: 0 0 20200000 SHCSR:70000
Try LPSPI3
    VERID: 1010004
    PARAM: 404
    CR: 0
    SR: 1
    IER: 0
    DER: 0
    CFGR0: 0
    CFGR1: 0
Try LPUART5
    VERID:4010003
    PARAM:202
    GLOBAL:0
    PINCFG:0
    BAUD:f000004
    STAT:c00000
    CTRL:0
    DATA:1000
    MATCH:0
    MODIR:0
    FIFO:c00011
    WATER:0
Try FlexIO
 401ac000
So walked through registers of LPSPI3 and LPUART5... ... Maybe should check to see if something already the CCM bit for these??
 
Unaligned accesses are allowed on our cortex-m7. There are only some registers where it is forbidden. I'll try to find one to see if, and in which cases we get a fault.

What I meant was half-word or byte accesses. But so far I could not find an example that would crash.
I'd say we don't dont need it.
 
@Kurt: Have you tried write-accesses? There might be a difference.

Re: interrup priority 0: Changing it because of USB would mean that it would need a lower priority than USB?
Don't know if that is possible (don't think so) and if it would make sense..
 
@Paul: Is there *any* chance to make the red LED blink? With a bootloader update? maybe by using "bkpt #42"? You said you're going to work on it..

It would allow to indicate crashes when no prinitng is possible (i.e. a crash before setup() or without USB or..)
 
@Kurt: Have you tried write-accesses? There might be a difference.

Re: interrup priority 0: Changing it because of USB would mean that it would need a lower priority than USB?
Don't know if that is possible (don't think so) and if it would make sense..

I have tried some write access will try some again.

Thanks
 
Didn't test this - but this edit should stop FAST REBOOT if the fault is the same:
Code:
void unused_interrupt_vector(void)
{
	uint32_t i, ipsr, crc, count, [B]crcOld[/B];
	const uint32_t *stack;
	struct arm_fault_info_struct *info;
	const uint32_t *p, *end;

	// disallow any nested interrupts
	__disable_irq();
	// store crash report info
	asm volatile("mrs %0, ipsr\n" : "=r" (ipsr) :: "memory");
	info = (struct arm_fault_info_struct *)0x2027FF80;
	[B]crcOld = info->crc;[/B]
//...
	while (1) {
		if (PIT_TFLG0) {
			//GPIO7_DR_TOGGLE = (1 << 3); // blink LED
			PIT_TFLG0 = 1;
[B]			++count
			if ((crcOld != info->crc) && count >= 8) break;  // reboot after .8 seconds
			else if (count >= 80) break;  // reboot after 8 seconds
[/B]		}

//...

P# 127 updated output scheme for self doc and better response

@KurtE - this p#155 isn't the latest more user friendly copy of "it's your fault"
Code:
C:\Users\kurte\Documents\Arduino\Teensy Tests\Test_CrashReport\Test_CrashReport.ino Jun 24 2021 06:25:59
 millis() now 443

[B]It's your fault![/B]

Anyone else note that examples/FPU_Turned_Off/FPU_Turned_Off.ino still works or not? Noted bout p#127 it was no longer printing on restart.

Gotta - run ...
 
What I meant was half-word or byte accesses. But so far I could not find an example that would crash.
I'd say we don't dont need it.

I did find an example that they say on an ARM should throw a fault but I can't modify the example properly to get it to compile in the IDE? Here is the link if anyone is interested. https://www.alfonsobeato.net/arm/how-to-access-safely-unaligned-data/

EDIT: Don't bother - I got the sketch working but it doesn't throw an error - it runs fine :) So looks like don't need unaligned fault.
 
Last edited:
Didn't test this - but this edit should stop FAST REBOOT if the fault is the same:
....
Anyone else note that examples/FPU_Turned_Off/FPU_Turned_Off.ino still works or not? Noted bout p#127 it was no longer printing on restart.

Gotta - run ...
Had to put the delay(5000) back in to see the fault. All I get is:
Code:
No Crash Reported or Crash was Cleared
Reboot was caused by software write to SCB_AIRCR or CPU lockup
and reboots constantly!
 
Had to put the delay(5000) back in to see the fault. All I get is:
Code:
No Crash Reported or Crash was Cleared
Reboot was caused by software write to SCB_AIRCR or CPU lockup
and reboots constantly!

That's odd - never saw such a thing before :: reboots constantly.

The FPU used to just restart and show the fault ... something changed? - stopped that? That's why I did 15s Restore - no help.

As far as FAULTS - check my sketch had a lin kfor 5+ fault to cause - I got some working : interrupt.memfault.com/blog/cortex-m-fault-debug

One of those is was to trigger a border crossing write error - Tried making a DWORD buff array and setting offset a BYTE [ to make g_unaligned_buffer ] and didn't get it resolved before I moved on:
Code:
Recovering from a UsageFault without a SYSRESET
In this example we’ll just step through the code we developed above and confirm we don’t reset when a UsageFault occurs.

void unaligned_double_word_read(void) {
  extern void *g_unaligned_buffer;
  uint64_t *buf = g_unaligned_buffer;
  *buf = 0x1122334455667788;
}

(I'm out trimming back bluff growth ... )
 
FYI - I tried some writes as well still no faults in some of the sub-systems...
<Yes still using older version of sketch...> ;)

Code:
   else if ( cc == '5' ) {
      Serial.printf("SCB_SHPRx: %x %x %x SHCSR:%x\n", SCB_SHPR1, SCB_SHPR2, SCB_SHPR3, SCB_SHCSR);

      // Lets try printing int LPSPI3 registers.
      Serial.println("Try LPSPI3"); Serial.flush();
      Serial.printf("    VERID: %x\n", LPSPI3_VERID); Serial.flush();
      Serial.printf("    PARAM: %x\n", LPSPI3_PARAM); Serial.flush();
      Serial.printf("    CR: %x\n", LPSPI3_CR); Serial.flush();
      Serial.printf("    SR: %x\n", LPSPI3_SR); Serial.flush();
      Serial.printf("    IER: %x\n", LPSPI3_IER); Serial.flush();
      Serial.printf("    DER: %x\n", LPSPI3_DER); Serial.flush();
      Serial.printf("    CFGR0: %x\n", LPSPI3_CFGR0); Serial.flush();
      Serial.printf("    CFGR1: %x\n", LPSPI3_CFGR1); Serial.flush();
      Serial.println("Try writes to LPSPI3"); Serial.flush();
      LPSPI3_CR = LPSPI_CR_RST;
      LPSPI3_FCR = LPSPI_FCR_TXWATER(15);
      LPSPI3_CR = 0;
      LPSPI3_CFGR1 = LPSPI_CFGR1_MASTER | LPSPI_CFGR1_SAMPLE;

      Serial.println("Try LPUART5"); Serial.flush();
      Serial.printf("CCM_CCGR3: %x Bits: %x\n", CCM_CCGR3, CCM_CCGR3_LPUART5(CCM_CCGR_ON));

      Serial.printf("    VERID:%x\n", LPUART5_VERID); Serial.flush();
      Serial.printf("    PARAM:%x\n", LPUART5_PARAM); Serial.flush();
      Serial.printf("    GLOBAL:%x\n", LPUART5_GLOBAL); Serial.flush();
      Serial.printf("    PINCFG:%x\n", LPUART5_PINCFG); Serial.flush();
      Serial.printf("    BAUD:%x\n", LPUART5_BAUD); Serial.flush();
      Serial.printf("    STAT:%x\n", LPUART5_STAT); Serial.flush();
      Serial.printf("    CTRL:%x\n", LPUART5_CTRL); Serial.flush();
      Serial.printf("    DATA:%x\n", LPUART5_DATA); Serial.flush();
      Serial.printf("    MATCH:%x\n", LPUART5_MATCH); Serial.flush();
      Serial.printf("    MODIR:%x\n", LPUART5_MODIR); Serial.flush();
      Serial.printf("    FIFO:%x\n", LPUART5_FIFO); Serial.flush();
      Serial.printf("    WATER:%x\n", LPUART5_WATER); Serial.flush();
      Serial.println("Try writes to LPUART5"); Serial.flush();
      LPUART5_BAUD = LPUART_BAUD_OSR(25) | LPUART_BAUD_SBR(8); // ~115200 baud
      LPUART5_CTRL = LPUART_CTRL_TE;
      LPUART5_DATA = 0xff;
      delay(25);
      Serial.println("Try CSI"); Serial.flush();
      Serial.printf("    CSI_CSICR1:%x\n", CSI_CSICR1); Serial.flush();
      Serial.printf("    CSI_CSICR2:%x\n", CSI_CSICR2); Serial.flush();
      Serial.printf("    CSI_CSICR3:%x\n", CSI_CSICR3); Serial.flush();

      Serial.println("Try FlexIO"); Serial.flush();

      // CCM_CCGR5 |= CCM_CCGR5_FLEXIO1(CCM_CCGR_ON);
      IMXRT_FLEXIO_t *p = &IMXRT_FLEXIO1_S;
      Serial.printf(" % x\n", (uint32_t)p); Serial.flush();
      Serial.printf("VERID: % x PARAM: % x CTRL: % x PIN: % x\n", p->VERID, p->PARAM, p->CTRL, p->PIN); Serial.flush();
      Serial.printf("SHIFTSTAT: % x SHIFTERR = % x TIMSTAT = % x\n", p->SHIFTSTAT, p->SHIFTERR, p->TIMSTAT); Serial.flush();
      Serial.printf("SHIFTSIEN: % x SHIFTEIEN = % x TIMIEN = % x\n", p->SHIFTSIEN, p->SHIFTEIEN, p->TIMIEN); Serial.flush();
      Serial.printf("SHIFTSDEN: % x SHIFTSTATE = % x\n", p->SHIFTSDEN, p->SHIFTSTATE); Serial.flush();
      Serial.printf("SHIFTCTL: % x % x % x % x\n", p->SHIFTCTL[0], p->SHIFTCTL[1], p->SHIFTCTL[2], p->SHIFTCTL[3]); Serial.flush();
      Serial.printf("SHIFTCFG: % x % x % x % x\n", p->SHIFTCFG[0], p->SHIFTCFG[1], p->SHIFTCFG[2], p->SHIFTCFG[3]); Serial.flush();
      Serial.printf("TIMCTL: % x % x % x % x\n", p->TIMCTL[0], p->TIMCTL[1], p->TIMCTL[2], p->TIMCTL[3]); Serial.flush();
      Serial.printf("TIMCFG: % x % x % x % x\n", p->TIMCFG[0], p->TIMCFG[1], p->TIMCFG[2], p->TIMCFG[3]); Serial.flush();
      Serial.printf("TIMCMP: % x % x % x % x\n", p->TIMCMP[0], p->TIMCMP[1], p->TIMCMP[2], p->TIMCMP[3]); Serial.flush();
      Serial.println("made it through");
    }
Still makes it to the FlexIO hang
Code:
C:\Users\kurte\Documents\Arduino\Teensy Tests\Test_CrashReport\Test_CrashReport.ino Jun 24 2021 09:30:58
 millis() now 435

It's your fault!

SCB_SHPRx: 0 0 20200000 SHCSR:70000
Try LPSPI3
    VERID: 1010004
    PARAM: 404
    CR: 0
    SR: 1
    IER: 0
    DER: 0
    CFGR0: 0
    CFGR1: 0
Try writes to LPSPI3
Try LPUART5
CCM_CCGR3: f00ff300 Bits: c
    VERID:4010003
    PARAM:202
    GLOBAL:0
    PINCFG:0
    BAUD:f000004
    STAT:c00000
    CTRL:0
    DATA:1000
    MATCH:0
    MODIR:0
    FIFO:c00011
    WATER:0
Try writes to LPUART5
Try CSI
    CSI_CSICR1:40000800
    CSI_CSICR2:0
    CSI_CSICR3:0
Try FlexIO
 401ac000
Will try more later.
 
FYI - I tried some writes as well still no faults in some of the sub-systems...
<Yes still using older version of sketch...> ;)
...
Will try more later.

With the single output showMyFault() it was easy to add the .flush - though that shouldn't really be needed, or be there, if the PJRC USB prodding in unused_interrupt_vector() works.
 
What do you think of this direction as a change for startup.c.
No longer wait 8 secs before restart so Teensy back on USB sooner and ready

Nope, definitely not going to make rebooting after only 0.8 second the default.

Automatically rebooting after *any* amount of time is a risky trade-off. 8 seconds is absolutely as short as I'm willing to make the default. I'm not even 100% sure 8 seconds is a good idea. It very well may become 10, 12 or 15 seconds.

If a user program faults, it stops. That is the expected model for faults. Until now, the behavior has always been to stop forever. Well, unless a watchdog timer is used. And for programs which need to recover from errors with predictable timing, use of watchdog timers is still going to be the recommended approach.


Then a new func call before setup - except for needing a "C" interface to get bool CrashReport:

For the upcoming 1.54 release, no more changes in the startup process. We're going to wrap up 1.54 with CrashReport having a known limitation of not being able to report on crashes in C++ constructors.

After 1.54, my plan is to return to the 3 month or new Arduino release cadence. We can consider those sorts of things for future releases.

Right now, stable 1.54 release is looming. Many types of changes I would consider "high risk" are off the table in this moment. Almost any change in startup behavior before setup() is high risk. We'll talk of those sorts of changes after 1.54 is finalized.
 
Please send pull requests to add simple 1-case fault tests.

https://github.com/PaulStoffregen/MyFault/tree/main/examples

Don't use Serial.flush() or delay() other things to make them "work" (or leave them commented out). If CrashReport doesn't handle any case well, I want to see the ugly result!

Not sure if you want to see this...
Code:
char bigArray[256 * 1024]; //256 kb

void setup() {
  Serial.begin(9600);  
  Serial.print(CrashReport);  

  memset(bigArray, 0, 500 * 1024); // violate array bounds - fill 500kb

}

void loop() {}

Ugly enough? - it crashes and reboots blazing fast without crash report and without 8 secs wait :)
It almost crashes my PC.
 
Quick note: on my updated...
Code:
    else if ( cc == '5' ) {
      showMyFault("Try FlexIO : Locks up");
       //CCM_CCGR5 |= CCM_CCGR5_FLEXIO1(CCM_CCGR_ON);
      Serial.printf("VERID:%x PARAM:%x CTRL:%x PIN: %x\n", FLEXIO1_VERID, FLEXIO1_PARAM, FLEXIO1_CTRL, FLEXIO1_PIN);
      Serial.println("made it through");
      Serial.flush();
    }
    else if ( cc == '6' ) {
      showMyFault("Fault using device not enabled");
      LPUART3_CTRL = 0; // did not enable the clock for this    }
    }
Using your updated stuff.

Note: Test 6 does not crash... It will crash if you have terminal monitor echoing any CR or LF character, as it will process those characters and fall through to the:
Code:
    else { // default fault on Serial with no other fault ... from 'Enter'
      showMyFault("NULL pointer write");
      uint32_t *y = 0; y[0] = 5;
    }
And crash with a NULL ptr...
 
@Paul:
BigArray example: It would work to create/initialize a "new" stack in the handler.
But this does not work with your 8 secs delay before reboot, you have to disable interrupts globally.
Because of this and other problems (crash before setup) i did it the other way and reseted asap and waited 10 secs after printing the crash-report.. this did work with USB only, I know.

But during development - where 99% crashes happen, this is OK, i thought..
 
CRLF: Thats why simple tests are better..

Don't know why acessing LPUART3 does not crash. Is it enabled?

Indeed the LPUart used to work - maybe last test on T_3.6 ...

No douvt the simple tests are nice single purpose ...

That test is a SIMPLE test bed. The CR behavior can be turned off - just put a if('?') char for it - but that was the initial test by design.

Having the ability to choose when it faults was great for repeat testing - should have had an 'r'estart for those not using TyComm to force a reset.
 
@Paul
I posted the panic example that changes the temp range. When I posted it was working as planned but went to make a change and now it crashes with panic temp as soon as you turn it on and looks like the arm clock isn't set yet either = something strange going on.
 
Back
Top