A simple class for sub microsecond timing

mlu

Well-known member
Using the elapsedMillis and elapsedMicros as template I wrote the following class elapsedCycles that uses DWT_CYCCNT as clock.

Code:
#ifndef elapsedCycles_h
#define elapsedCycles_h
#ifdef __cplusplus

#if ARDUINO >= 100
#include "Arduino.h"
#else
#include "WProgram.h"
#endif

/* Class for CPU clycle resolution timing */
/* Conversions between cycles and nanosecons are safe up to 100 mS */

class elapsedCycles
{
private:
	unsigned long cyccount;
public:
	elapsedCycles(void) { ARM_DEMCR |= ARM_DEMCR_TRCENA; ARM_DWT_CTRL |= 1; cyccount = ARM_DWT_CYCCNT; }
	elapsedCycles(unsigned long val) { ARM_DWT_CTRL |= 1; cyccount = ARM_DWT_CYCCNT - val; }
	elapsedCycles(const elapsedCycles &orig) { ARM_DWT_CTRL |= 1; cyccount = orig.cyccount; }
	operator unsigned long () const { return ARM_DWT_CYCCNT - cyccount; }
	elapsedCycles & operator = (const elapsedCycles &rhs) { cyccount = rhs.cyccount; return *this; }
	elapsedCycles & operator = (unsigned long val) { cyccount = ARM_DWT_CYCCNT - val; return *this; }
	elapsedCycles & operator -= (unsigned long val)      { cyccount += val ; return *this; }
	elapsedCycles & operator += (unsigned long val)      { cyccount -= val ; return *this; }
	elapsedCycles operator - (int val) const           { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (unsigned int val) const  { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (long val) const          { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (unsigned long val) const { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator + (int val) const           { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (unsigned int val) const  { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (long val) const          { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (unsigned long val) const { elapsedCycles r(*this); r.cyccount -= val; return r; }
	static unsigned long ns2cycles(unsigned long ns) {return (ns*(F_CPU/8000000UL))/125UL; }
	static unsigned long cycles2ns(unsigned long cycles) {return (cycles*125UL)/(F_CPU/8000000UL); }
};

#endif // __cplusplus
#endif // elapsedCycles_h

To test it I wrote a very simple sketch with a function to bitbanging a WS2812 chain, yes I know there is OctoWS, but this is demo of the timing functions. An elapsedCycles object is also used to time the call to the ws2812b_send function.

Code:
#include <elapsedCycles.h>

int led = 13;
int onoff = 0;

elapsedCycles cyccnt;
elapsedCycles loopcyc(0);

#define WL2812PIN 12
elapsedCycles bittime;

#define T0H  ((350*(F_CPU/8000000UL))/125UL)
#define T1H  ((850*(F_CPU/8000000UL))/125UL)
#define TPER ((1250*(F_CPU/8000000UL))/125UL)
#define TRET ((50000*(F_CPU/8000000UL))/125UL)

void ws2812b_send(uint32_t * rgbarray, int count) {
  __disable_irq();
  for (int n = 0; n<count; n++) {
    uint32_t register rgb = rgbarray[n];
    for (int bitpos=0; bitpos<24; bitpos++) {
      bittime = 0;
      digitalWriteFast(WL2812PIN, 1);
      if (rgb & 0x800000) {
        while (bittime<T1H);
        digitalWriteFast(WL2812PIN, 0);
      }
      else {
        while (bittime<T0H) ;
        digitalWriteFast(WL2812PIN, 0);
      }
      rgb = (rgb<<1);      
      while (bittime<TPER) ;
    }
  }
  __enable_irq();
  while(bittime<TRET) ;  
}

uint32_t grbarray[3];

void setup() {
  // put your setup code here, to run once:
  pinMode(led, OUTPUT);
  Serial.begin(9600); 
  grbarray[0] = 0xA50F4C;
  grbarray[1] = 0x808080;
  grbarray[2] = 0x204080;
  pinMode(WL2812PIN, OUTPUT);
  digitalWriteFast(WL2812PIN, 0);
  delay(2000);
  Serial.println(T0H);
  Serial.println(T1H);
}

int tick=0;

void loop() {
  // put your main code here, to run repeatedly:
  /* Number of cpu cycles in 100 ms */
  uint32_t cycles_100ms = elapsedCycles::ns2cycles(100000000);
    if (loopcyc>cycles_100ms) {
      tick++;
      loopcyc -= cycles_100ms;
      if (tick==5) {
        tick = 0;
        Serial.print("ledtick: ");
        Serial.println(loopcyc);
        digitalWrite(led, onoff);
        onoff=1-onoff;
      }
      cyccnt = 0;
      ws2812b_send(grbarray, 3);
      uint32_t cycles = cyccnt;
      Serial.print("Sending 3 WS2812 pixels in ");
      Serial.print(cycles);
      Serial.print(" clock cycles taking ");
      Serial.print(elapsedCycles::cycles2ns(cycles));
      Serial.println(" nano seconds");
    }
}

EDIT Fix some errors in the example sketch, giving wrong color values. (one rgb = (rgb<<1) shift to many )
 
Last edited:
Thanks - searching ARM_DWT_CYCCNT while I was thinking of writing something similar starting from elapsedMicros - and found this :) First test looks cleaner and runs faster.
 
Using the elapsedMillis and elapsedMicros as template I wrote the following class elapsedCycles that uses DWT_CYCCNT as clock.

Code:
#ifndef elapsedCycles_h
#define elapsedCycles_h
#ifdef __cplusplus

#if ARDUINO >= 100
#include "Arduino.h"
#else
#include "WProgram.h"
#endif

/* Class for CPU clycle resolution timing */
/* Conversions between cycles and nanosecons are safe up to 100 mS */

class elapsedCycles
{
private:
	unsigned long cyccount;
public:
	elapsedCycles(void) { ARM_DEMCR |= ARM_DEMCR_TRCENA; ARM_DWT_CTRL |= 1; cyccount = ARM_DWT_CYCCNT; }
	elapsedCycles(unsigned long val) { ARM_DWT_CTRL |= 1; cyccount = ARM_DWT_CYCCNT - val; }
	elapsedCycles(const elapsedCycles &orig) { ARM_DWT_CTRL |= 1; cyccount = orig.cyccount; }
	operator unsigned long () const { return ARM_DWT_CYCCNT - cyccount; }
	elapsedCycles & operator = (const elapsedCycles &rhs) { cyccount = rhs.cyccount; return *this; }
	elapsedCycles & operator = (unsigned long val) { cyccount = ARM_DWT_CYCCNT - val; return *this; }
	elapsedCycles & operator -= (unsigned long val)      { cyccount += val ; return *this; }
	elapsedCycles & operator += (unsigned long val)      { cyccount -= val ; return *this; }
	elapsedCycles operator - (int val) const           { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (unsigned int val) const  { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (long val) const          { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator - (unsigned long val) const { elapsedCycles r(*this); r.cyccount += val; return r; }
	elapsedCycles operator + (int val) const           { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (unsigned int val) const  { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (long val) const          { elapsedCycles r(*this); r.cyccount -= val; return r; }
	elapsedCycles operator + (unsigned long val) const { elapsedCycles r(*this); r.cyccount -= val; return r; }
	static unsigned long ns2cycles(unsigned long ns) {return (ns*(F_CPU/8000000UL))/125UL; }
	static unsigned long cycles2ns(unsigned long cycles) {return (cycles*125UL)/(F_CPU/8000000UL); }
};

#endif // __cplusplus
#endif // elapsedCycles_h

To test it I wrote a very simple sketch with a function to bitbanging a WS2812 chain, yes I know there is OctoWS, but this is demo of the timing functions. An elapsedCycles object is also used to time the call to the ws2812b_send function.

Code:
#include <elapsedCycles.h>

int led = 13;
int onoff = 0;

elapsedCycles cyccnt;
elapsedCycles loopcyc(0);

#define WL2812PIN 12
elapsedCycles bittime;

#define T0H  ((350*(F_CPU/8000000UL))/125UL)
#define T1H  ((850*(F_CPU/8000000UL))/125UL)
#define TPER ((1250*(F_CPU/8000000UL))/125UL)
#define TRET ((50000*(F_CPU/8000000UL))/125UL)

void ws2812b_send(uint32_t * rgbarray, int count) {
  __disable_irq();
  for (int n = 0; n<count; n++) {
    uint32_t register rgb = rgbarray[n];
    for (int bitpos=0; bitpos<24; bitpos++) {
      bittime = 0;
      digitalWriteFast(WL2812PIN, 1);
      if (rgb & 0x800000) {
        while (bittime<T1H);
        digitalWriteFast(WL2812PIN, 0);
      }
      else {
        while (bittime<T0H) ;
        digitalWriteFast(WL2812PIN, 0);
      }
      rgb = (rgb<<1);      
      while (bittime<TPER) ;
    }
  }
  __enable_irq();
  while(bittime<TRET) ;  
}

uint32_t grbarray[3];

void setup() {
  // put your setup code here, to run once:
  pinMode(led, OUTPUT);
  Serial.begin(9600); 
  grbarray[0] = 0xA50F4C;
  grbarray[1] = 0x808080;
  grbarray[2] = 0x204080;
  pinMode(WL2812PIN, OUTPUT);
  digitalWriteFast(WL2812PIN, 0);
  delay(2000);
  Serial.println(T0H);
  Serial.println(T1H);
}

int tick=0;

void loop() {
  // put your main code here, to run repeatedly:
  /* Number of cpu cycles in 100 ms */
  uint32_t cycles_100ms = elapsedCycles::ns2cycles(100000000);
    if (loopcyc>cycles_100ms) {
      tick++;
      loopcyc -= cycles_100ms;
      if (tick==5) {
        tick = 0;
        Serial.print("ledtick: ");
        Serial.println(loopcyc);
        digitalWrite(led, onoff);
        onoff=1-onoff;
      }
      cyccnt = 0;
      ws2812b_send(grbarray, 3);
      uint32_t cycles = cyccnt;
      Serial.print("Sending 3 WS2812 pixels in ");
      Serial.print(cycles);
      Serial.print(" clock cycles taking ");
      Serial.print(elapsedCycles::cycles2ns(cycles));
      Serial.println(" nano seconds");
    }
}

EDIT Fix some errors in the example sketch, giving wrong color values. (one rgb = (rgb<<1) shift to many )

Hi mlu,

I want to use your code in an open source project (under the apache licence, if this is ok?), how do you want to be credited?
 
Back
Top