loop execution time with dual-issue disabled Teensy 4.0

Status
Not open for further replies.

ossi

Well-known member
I am currently playing around with the Teens4.0 and want to understand its architectural behaviour. To inspect "dual issue" I created a program (listing complete below) that contains the following loop:

Code:
  for( k=0 ; k<nn ; k++){
      CORE_PIN13_PORTSET = CORE_PIN13_BITMASK; // led-1
      CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK; // led-2
      xx *= 105529 ;
      vv += m & 0x1234 ;
      m +=17 ;
      }

In assembly code this is:

Code:
// r9=105529 ; r3=m ; r5=v ; r6=xx
//     1d8:  f241 2234  movw  r2, #4660      ; 0x1234  ; r2=0x1234
//     1dc: f8c8 7084   str.w r7, [r8, #132]   ; 0x84   CORE_PIN13_PORTSET = CORE_PIN13_BITMASK; // led-1
//     1e0: fb09 f606   mul.w r6, r9, r6         ;  xx *= 105529 ;
//     1e4: 401a        ands  r2, r3                 ; r2= m & 0x1234 ; r2=1234h r3=m
//     1e6: 3311        adds  r3, #17              ;  m +=17 ;
//     1e8: f8c8 7088   str.w r7, [r8, #136]   ; 0x88 CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK; // led-2
//    for( k=0 ; k<256 ; k++){
//     1ec: 4299        cmp r1, r3                   ; loop end test r1 <> m
//     1ee: 4415        add r5, r2                    ; r5 += m & 0x1234 ; r5=v
//     1f0: d1f2        bne.n 1d8 <loop+0x1c> ; loop go on

The cycle measurement now gives the following result: With dual issue enabled (standard setting) a loop needs 5 cycles.
With dual issue disabled

Code:
void disableDualIssue(){
  Serial.println("disableDualIssue()") ;
  Serial.printf("before: ACTLR=%08XH\n",*ACTLR) ;
  (*ACTLR) |= 0x0001F0000 ; // DISDI
  (*ACTLR) |= 0x003E00000 ; // DISISSCH1
  Serial.printf("after:  ACTLR=%08XH\n",*ACTLR) ;
  }
the loop needs also only 6 cycles. The question is: How can the teensy execute 9 instructions in 6 cycles without dual issuing?

To measure cycles I use the internal cycle counter and the signal on the LED pin.
The complete program is:
Code:
int led = 13;
#define MHz 1e6
#define ns 1e-9

extern "C" uint32_t set_arm_clock(uint32_t frequency);

void setClock100MHz(){
  int frq=100000000 ;
  Serial.printf("F_CPU_ACTUAL  =%8.4f MHz\n",F_CPU_ACTUAL/MHz);
  Serial.printf("set_arm_clock (%8.4f MHz)\n",frq/MHz);
  set_arm_clock(frq); 
  Serial.printf("F_CPU_ACTUAL  =%8.4f MHz\n",F_CPU_ACTUAL/MHz);
  Serial.printf("F_CPU_ACTUAL/4=%8.4f MHz\n",F_CPU_ACTUAL/4/MHz);
  Serial.printf("1/F_CPU_ACTUAL=%8.4f ns\n",1.0/F_CPU_ACTUAL/ns);
  }

#define ACTLR ((int*) 0xE000E008) 

void disableDualIssue(){
  Serial.println("disableDualIssue()") ;
  Serial.printf("before: ACTLR=%08XH\n",*ACTLR) ;
  (*ACTLR) |= 0x0001F0000 ; // DISDI
  (*ACTLR) |= 0x003E00000 ; // DISISSCH1
  Serial.printf("after:  ACTLR=%08XH\n",*ACTLR) ;
  }
    
void setup() {
  pinMode(led, OUTPUT);
  Serial.begin(115200);  
  while(!Serial){} ;
  // Enable CPU Cycle Count
  ARM_DEMCR |= ARM_DEMCR_TRCENA;
  ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
  
 // setClock100MHz() ;
  disableDualIssue() ;
  delay(2000) ;
  Serial.println("teensy40startSequenceOfLoop1...") ;
  delay(200) ;
  }

int cyclesStart ;
int cyclesStop ;
int cycles ;

void loop(){
  int xx=42 ;
  int nn=256 ;
  int k ;
  int m ;
  m=0 ;
  int vv=0 ;
  while(1){  // 5 cycles
    cyclesStart = ARM_DWT_CYCCNT ;
    for( k=0 ; k<nn ; k++){
      CORE_PIN13_PORTSET = CORE_PIN13_BITMASK; // led-1
      CORE_PIN13_PORTCLEAR = CORE_PIN13_BITMASK; // led-2
      xx *= 105529 ;
      vv += m & 0x1234 ;
      m +=17 ;
      }
    cyclesStop = ARM_DWT_CYCCNT ;
   Serial.printf("k=%8i xx=%8i m=%8i vv=%8i \n",k++,xx,m,vv) ; 
   cycles=cyclesStop-cyclesStart ; 
   Serial.printf("cyclesNeeded=%d cycles per loop=%d \n",cycles,cycles/nn) ;
   delay(1000) ;
   //Serial.println("abort...") ;while(1){} ;
   }  
  }
 
The question is: How can the teensy execute 9 instructions in 6 cycles without dual issuing?

Apparently setting those bits in ACTLR doesn't disable all instructions. There is a bit, DISFOLD (bit 2), that completely disables dual issue. After setting it, the example takes 9 cycles.

Replace
Code:
  (*ACTLR) |= 0x001F0000 ; // DISDI
  (*ACTLR) |= 0x03E00000 ; // DISISSCH1
with
Code:
  (*ACTLR) |= 0x00000004 ; // DISFOLD
 
Status
Not open for further replies.
Back
Top