I am currently measuring the execution times of simple delay loops. I toggle pin 13 and measure the execution times using an oscilloscope. Clock frequency is 600 MHz. The function nopLoopn() contains n NOP instructions (n=0..7). The complete program is:
The compiler inserts the NOP instructions as given by the source code. The generated code for nopLoop5() for example looks like this:
Interesting are the loop execution times:
Interesting is that the NOPs seem to be not executed for n=1..5, for n=6 the nops are sometimes executed, for n=7 there is 1 nop that gets always executed.
Is there a simple explanation for this behaviour?
Code:
int led = 13;
void setup() {
pinMode(led, OUTPUT);
delay(2000) ;
Serial.println("teensy40nopLoops1...") ;
delay(200) ;
nopLoop5() ;
}
void nopLoop0() { // 150MHz = 4 cycles
Serial.println("nopLoop0()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop1() { // 150MHz = 4 cycles
Serial.println("nopLoop1()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop2() { // 150MHz = 4 cycles
Serial.println("nopLoop2()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop3() { // 150MHz = 4 cycles
Serial.println("nopLoop3()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop4() { // 150MHz = 4 cycles
Serial.println("nopLoop4()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop5() { // 150MHz = 4 cycles
Serial.println("nopLoop5()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop6() { // mix 4/5 cycles ?
Serial.println("nopLoop6()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void nopLoop7() { // 120 MHz = 5 cycles
Serial.println("nopLoop7()...") ;
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
asm volatile("nop");
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
}
}
void loop(){
}
The compiler inserts the NOP instructions as given by the source code. The generated code for nopLoop5() for example looks like this:
Code:
void nopLoop5() { // 150MHz = 4 cycles
a0: b508 push {r3, lr}
Serial.println("nopLoop5()...") ;
a2: 4908 ldr r1, [pc, #32] ; (c4 <nopLoop5()+0x24>)
a4: 4808 ldr r0, [pc, #32] ; (c8 <nopLoop5()+0x28>)
a6: f7ff ffe9 bl 7c <Print::println(char const*)>
while(1){
CORE_PIN13_PORTSET = CORE_PIN13_BITMASK;
aa: 4a08 ldr r2, [pc, #32] ; (cc <nopLoop5()+0x2c>)
ac: 2308 movs r3, #8
ae: f8c2 3084 str.w r3, [r2, #132] ; 0x84
asm volatile("nop");
b2: bf00 nop
asm volatile("nop");
b4: bf00 nop
asm volatile("nop");
b6: bf00 nop
asm volatile("nop");
b8: bf00 nop
asm volatile("nop");
ba: bf00 nop
CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK;
bc: f8c2 3088 str.w r3, [r2, #136] ; 0x88
c0: e7f5 b.n ae <nopLoop5()+0xe>
c2: bf00 nop
c4: 20000050 .word 0x20000050
c8: 20000464 .word 0x20000464
cc: 42004000 .word 0x42004000
Code:
n execution frequency/time
0 150 MHz 4 cycles
1 150 MHz 4 cycles
2 150 MHz 4 cycles
3 150 MHz 4 cycles
4 150 MHz 4 cycles
5 150 MHz 4 cycles
6 4/5 cycles
7 120 MHz 5
Is there a simple explanation for this behaviour?