I am trying to sample a GPIO port to measure the phase difference between two signals with the highest possible resolution. All the hardware blocks that I would normally try to do this with are already tied up for other things or not routed out to pins on the Teensy 4.0.
The processor reference manual states on page 949:
According to this discussion here:
https://forum.pjrc.com/threads/58432-Nanosecond-Resolution-Interrupts-on-Teensy-4-0
the Teensy 4.0 defaults to fast mapping.
This is interesting because it indicates that these pins could maybe be sampled at close to the full 600+Mhz core frequency.
So I wrote some code to test this. The result is that no matter what I try the maximum sample rate I can get is somewhere between 70-80 Mhz.
Here some assembly code looping 100 measurements into an array with the tightest loop I can think of:
here another approach avoiding the loop altogether:
Even when avoiding that kind of memory array access delay by first filling all available registers yields the same sampling speed:
Am I doing something wrong? Any help at all would be greatly appreciated.
Jan
The processor reference manual states on page 949:
Code:
• GPIO1-5 are standard-speed GPIOs that run off the IPG_CLK_ROOT, whileGPIO6-9 are high-speed GPIOs that run at the AHB_CLK_ROOT frequency.
According to this discussion here:
https://forum.pjrc.com/threads/58432-Nanosecond-Resolution-Interrupts-on-Teensy-4-0
the Teensy 4.0 defaults to fast mapping.
This is interesting because it indicates that these pins could maybe be sampled at close to the full 600+Mhz core frequency.
So I wrote some code to test this. The result is that no matter what I try the maximum sample rate I can get is somewhere between 70-80 Mhz.
Here some assembly code looping 100 measurements into an array with the tightest loop I can think of:
Code:
uint32_t gpioData[100];
for (int j = 0; j < 100; j++)
{
gpioData[j] = 5;
}
asm volatile("ldr r0 ,=0x42004008 \n\t" // load address of GPIO6_PSR into r0
"mov r1 ,%0 \n\t" // copy address of array into register, index = 0
"mov r2, r1 \n\t" // copy r1 to start on end-of-loop-condition
"add r2, #396 \n\t" // end-of-loop condition. we want 100 4 byte values, so the end is at 400 - 4 = 396 bytes after the beginning of the array
"nextdata: \n\t" // create label to loop to
"ldr r3 ,[r0] \n\t" // load value of GPIO6_PSR into r3
"str r3 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"cmp r1, r2 \n\t" // check loop counter against loop limit
"ble nextdata \n\t" // loop if limit not reached
: // output operand list
: "r" (gpioData) // input operand list
: "r0", "r1", "r2","r3"//, "r4", "r5","r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
// now write the 100 values
for (int j = 0; j < 100; j++)
{
Serial.println(gpioData[j]);
}
here another approach avoiding the loop altogether:
Code:
uint32_t gpioData[100];
// first read 100 values as fast as possible
for (int i = 0; i < 100; i++)
{
gpioData[i] = 5;
}
asm volatile("ldr r0 ,=0x42004008 \n\t" // load address of GPIO6_PSR into r0
"mov r1 ,%0 \n\t" // copy address of array into register, index = 0
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r2
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r1
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r2
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r1
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r2
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r1
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
[...]
// many more to fill the array
[...][
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r2
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
: // output operand list
: "r" (gpioData) // input operand list
: "r0", "r1", "r2"
);
// now write the 100 values
for (int j = 0; j < 100; j++)
{
Serial.println(gpioData[j]);
}
Even when avoiding that kind of memory array access delay by first filling all available registers yields the same sampling speed:
Code:
uint32_t gpioData[11];
asm volatile("ldr r0 ,=0x42004008 \n\t" // load address of GPIO6_PSR into r0
"mov r1 ,%0 \n\t" // copy address of array into register, index = 0
"ldr r2 ,[r0] \n\t" // load value of GPIO6_PSR into r2
"ldr r3 ,[r0] \n\t" // load value of GPIO6_PSR into r3
"ldr r4 ,[r0] \n\t" // load value of GPIO6_PSR into r4
"ldr r5 ,[r0] \n\t" // load value of GPIO6_PSR into r5
"ldr r6 ,[r0] \n\t" // load value of GPIO6_PSR into r6
"ldr r7 ,[r0] \n\t" // load value of GPIO6_PSR into r7
"ldr r8 ,[r0] \n\t" // load value of GPIO6_PSR into r8
"ldr r9 ,[r0] \n\t" // load value of GPIO6_PSR into r9
"ldr r10,[r0] \n\t" // load value of GPIO6_PSR into r10
"ldr r11,[r0] \n\t" // load value of GPIO6_PSR into r11
"ldr r12,[r0] \n\t" // load value of GPIO6_PSR into r12
"str r2 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r3 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r4 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r5 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r6 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r7 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r8 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r9 ,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r10,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r11,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
"str r12,[r1], #4 \n\t" // store value into gpioDataArray and then add 4 bytes to the index
: // output operand list
: "r" (gpioData) // input operand list
: "r0", "r1", "r2","r3", "r4", "r5","r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
// now write the 11 values
for (int j = 0; j < 11; j++)
{
Serial.println(gpioData[j]);
}
Am I doing something wrong? Any help at all would be greatly appreciated.
Jan
Last edited: