Usage of FTM on Teensy LC

Hello,
i want to use a library to generate XY"-100 Signals to control a galvometer.
It uses the Flex timer module to trigger the dma to write the data.
The library has two implementation, flor the Teensy LC and the Teensy 3.2. This is the link to the library.

https://github.com/Tuet/XY2_100

The problem is that inly the implementation for the Teensy 3.2 works.
The only difference between the 2 implementation ist in the FTM timer setup/ control and the trigger setting fot the dma.
Here are both code pieces where the difference is. The define for the Teensy LC is __MKL26Z64__.
Code:
void XY2_100::begin(void)
{
	uint32_t bufsize, frequency;
	bufsize = 40;

	// set up the buffers
	memset(pingBuffer, 0, bufsize);
	memset(pongBuffer, 0, bufsize);

	// configure the 8 output pins
	GPIOD_PCOR = 0xFF;
  GPIOD_PDOR = 0x0F;
	pinMode(2, OUTPUT);	 // bit 0
	pinMode(14, OUTPUT); // bit 1
	pinMode(7, OUTPUT);  // bit 2
	pinMode(8, OUTPUT);  // bit 3
	pinMode(6, OUTPUT);  // bit 4
	pinMode(20, OUTPUT); // bit 5
	pinMode(21, OUTPUT); // bit 6
	pinMode(5, OUTPUT);  // bit 7

	frequency = 4000000;
  
	// DMA channel writes the data
	dma.sourceBuffer((uint8_t *)pingBuffer, bufsize);
	dma.destination(GPIOD_PDOR);
	dma.transferSize(1);
	dma.transferCount(bufsize);
	dma.disableOnCompletion();
  dma.interruptAtCompletion();
  
	pinMode(9, OUTPUT); // testing: oscilloscope trigger

#if defined(__MK20DX256__)  
  // TEENSY 3.1/3.2
	FTM2_SC = 0;
	FTM2_CNT = 0;
	uint32_t mod = (F_BUS + frequency / 2) / frequency;
	FTM2_MOD = mod - 1; // 11 @96Mhz, 8 @72MHz
	FTM2_SC = FTM_SC_CLKS(1) | FTM_SC_PS(0); // increment on every TPM clock, prescaler 1
  
  // need ISR also
  FTM2_C0SC = 0x69;  // MSB:MSA 10, ELSB:ELSA 10, DMA on
	FTM2_C0V = (mod * 128) >> 8;  // 256 = 100% of the time

  // route the timer interrupt to trigger the dma channel
  dma.triggerAtHardwareEvent(DMAMUX_SOURCE_FTM2_CH0);
  // enable a done interrupts when channel completes
	dma.attachInterrupt(isr);

	FTM2_C0SC = 0x28;
  noInterrupts();
	FTM2_SC = 0;             // stop FTM2 timer (hopefully before it rolls over)
	FTM2_CNT = 0;

	//PORTB_ISFR = (1<<18);    // clear any prior rising edge
	uint32_t tmp __attribute__((unused));
	FTM2_C0SC = 0x28;
	tmp = FTM2_C0SC;         // clear any prior timer DMA triggers
	FTM2_C0SC = 0x69;
  dma.enable();
	FTM2_SC = FTM_SC_CLKS(1) | FTM_SC_PS(0); // restart FTM2 timer
  
#elif defined(__MKL26Z64__)
  // TEENSY LC
	FTM2_SC = 0;
	FTM2_CNT = 0;
	uint32_t mod = F_CPU / frequency;
	FTM2_MOD = mod - 1;
	FTM2_SC = FTM_SC_CLKS(1) | FTM_SC_PS(0); // increment on every TPM clock, prescaler 1
  
	// route the timer interrupt to trigger the dma channel
	dma.triggerAtHardwareEvent(DMAMUX_SOURCE_FTM2_OV);
  // enable a done interrupts when channel completes
	dma.attachInterrupt(isr);

	uint32_t sc __attribute__((unused)) = FTM2_SC;  
	noInterrupts();
	FTM2_SC = 0;		// stop FTM2 timer (hopefully before it rolls over)
	dma.clearComplete();
	dma.transferCount(bufsize);
	dma.sourceBuffer((uint8_t *)pingBuffer, bufsize);
	// clear any pending event flags
	FTM2_SC = FTM_SC_TOF;
	dma.enable();		// enable DMA channel
	FTM2_CNT = 0; // writing any value resets counter
	FTM2_SC = FTM_SC_DMA | FTM_SC_CLKS(1) | FTM_SC_PS(0);
#endif  

	//digitalWriteFast(9, LOW);
	interrupts();
}

Code:
void XY2_100::isr(void)
{
	//digitalWriteFast(9, LOW);
  
	dma.clearInterrupt();
  if(txPing & 2) {
    txPing &= ~2;
    if(txPing & 1) {
      dma.sourceBuffer((uint8_t *)pongBuffer, 40);
    } else {
      dma.sourceBuffer((uint8_t *)pingBuffer, 40);
    }
  }
  //txPing |= 128;

#if defined(__MK20DX256__)  
  FTM2_SC = 0;
  FTM2_SC = FTM_SC_TOF;
  uint32_t tmp __attribute__((unused));
	FTM2_C0SC = 0x28;
	tmp = FTM2_C0SC;         // clear any prior timer DMA triggers
	FTM2_C0SC = 0x69;
  FTM2_CNT = 0;
	dma.enable();		// enable DMA channel
  FTM2_SC = FTM_SC_CLKS(1) | FTM_SC_PS(0); // restart FTM2 timer
#elif defined(__MKL26Z64__)
  FTM2_SC = 0;
  FTM2_SC = FTM_SC_TOF;
	dma.enable();		// enable DMA channel
	FTM2_CNT = 0; // writing any value resets counter 
	FTM2_SC = FTM_SC_DMA | FTM_SC_CLKS(1) | FTM_SC_PS(0);  
#endif  

 	//digitalWriteFast(9, HIGH); // oscilloscope trigger 
}

I tried making it work and i also found this thread
https://forum.pjrc.com/threads/54949-Teensy-LC-Trigger-DMA-with-timer
and tried implementing the solution from the comment, but it didnt work really well.
This is my version.
Code:
void XY2_100::begin(void)
{
	uint32_t bufsize, frequency;
	bufsize = 40;

	// set up the buffers
	memset(pingBuffer, 0, bufsize);
	memset(pongBuffer, 0, bufsize);

	// configure the 8 output pins
	GPIOD_PCOR = 0xFF;
  GPIOD_PDOR = 0x0F;
	pinMode(2, OUTPUT);	 // bit 0
	pinMode(14, OUTPUT); // bit 1
	pinMode(7, OUTPUT);  // bit 2
	pinMode(8, OUTPUT);  // bit 3
	pinMode(6, OUTPUT);  // bit 4
	pinMode(20, OUTPUT); // bit 5
	pinMode(21, OUTPUT); // bit 6
	pinMode(5, OUTPUT);  // bit 7

	frequency = 100000;
  
	// DMA channel writes the data
  dma.begin(true);
	dma.sourceBuffer((uint8_t *)pingBuffer, bufsize);
	dma.destination(GPIOD_PDOR);
	dma.transferSize(1);
	dma.transferCount(bufsize);
	dma.disableOnCompletion();
  dma.interruptAtCompletion();
  
	pinMode(9, OUTPUT); // testing: oscilloscope trigger

#if defined(testmode)  
  // TEENSY LC
	dma.triggerAtHardwareEvent(DMAMUX_SOURCE_FTM1_CH1);
  dma.attachInterrupt(isr);
	dma.enable();		// enable DMA channel

  SIM_SCGC6 |= SIM_SCGC6_TPM1;
  FTM1_SC = 0; delay(1);
	FTM1_CNT = 0;
	FTM1_MOD = 0x3F00;//F_CPU/frequency - 1;
  //FTM1_MOD = (F_PLL/2)/frequency - 1;
	FTM1_C1V = 0x3F00;
  FTM1_SC =   FTM_SC_CLKS(1) | FTM_SC_PS(0) ;
	FTM1_C1SC |= FTM_CSC_DMA;
#endif

Code:
void XY2_100::isr(void)
{
	digitalWriteFast(9, LOW);
  
	dma.clearInterrupt();
  if(txPing & 2) {
    txPing &= ~2;
    if(txPing & 1) {
      dma.sourceBuffer((uint8_t *)pongBuffer, 40);
    } else {
      dma.sourceBuffer((uint8_t *)pingBuffer, 40);
    }
  }
  //txPing |= 128;

#if defined(testmode)  
  FTM1_C1SC &= ~FTM_CSC_DMA;
  dma.enable();
  FTM1_CNT = 0;
	FTM1_C1SC |= FTM_CSC_DMA;
#endif  

 	digitalWriteFast(9, HIGH); // oscilloscope trigger 
}

The main programm i used is the "BasicTest.ino" found in the example from the library.
To check my results i used the pin9 in the ISR wich is called after the dma and also the clock pin2.
With some FTM1_MOD values it seems to kind of work a with a very low freqency.

Maybe i am understanding the code or timer wrong.
Im am using a Teensy LC, the Arduino IDE version 1.8.19 with Teensyduino version 1.56.
The test for the Teensy 3.2 implementation was done by someone from this forum.
https://forum.pjrc.com/threads/70516-Need-help-with-testing-a-Library-for-signal-generation
I would greatly appreciate it if someone could tell me what is wrong with the implemtation for the Teensy LC.
Thanks
 
here is sketch that demonstrates using TPM2/DMA to set the lower 8 PORTD bits on Teensy LC (tested with scope on PORTD pins)
Code:
//  PORTD  pins 2 14 7 8 6 20 21 5  TPM2

#include <DMAChannel.h>

#define PRREG(x) Serial.print(#x" 0x"); Serial.println(x,HEX)

#define FREQ 1000

DMAChannel dma(false);

uint8_t bits[] = {
  0b10010010,
  0b10000001,
  0b00000010,
  0b10010001,
  0b10000010,
  0b00000001
};

void prregs() {
  PRREG(SIM_SCGC6);
  PRREG(DMA_DSR_BCR0);
  PRREG(DMA_DCR0);
  PRREG(TPM2_SC);
  PRREG(TPM2_C1SC);
  PRREG(TPM2_MOD);
}
void manual() {
  while (1) {
    digitalWriteFast(13, 1);
    for (int i = 0; i < sizeof(bits); i++) {
      GPIOD_PDOR = bits[i];
      delay(1);
    }
    digitalWriteFast(13, 0);
  }
}

void timer_init() {
  // tpm2 chnl 1
  TPM2_SC = 0;
  TPM2_CNT = 0;
  TPM2_MOD = F_CPU / FREQ - 1;
  TPM2_C0V = 0;
  TPM2_SC =   FTM_SC_CLKS(1) | FTM_SC_PS(0) ;
  TPM2_C1SC |= FTM_CSC_DMA;
}

void dma_init(int useisr) {
  // DMA init
  dma.begin(true);
  dma.sourceBuffer(bits, sizeof(bits));
  dma.destination(GPIOD_PDOR);
  dma.transferSize(1);
  dma.disableOnCompletion();
  dma.triggerAtHardwareEvent(DMAMUX_SOURCE_TPM2_CH1);
  if (useisr) {
    dma.interruptAtCompletion();
    dma.attachInterrupt(dmaisr);
  }
  dma.enable();
}

void polling() {
  dma_init(0);
  timer_init();
  prregs();
  while (1) {
    // poll
    if (dma.complete() ) {
      dma.clearInterrupt();
      dma.sourceBuffer(bits, sizeof(bits));
      dma.enable();
    }
  }
}

void dmaisr() {
  dma.clearInterrupt();
  dma.sourceBuffer(bits, sizeof(bits));
  dma.enable();
}

void interrupt()  {
  dma_init(1);
  timer_init();
  prregs();

}
void setup() {
  Serial.begin(9600);
  while (!Serial);
  pinMode(13, OUTPUT);
  // PORTD
  pinMode(2, OUTPUT);  // bit 0
  pinMode(14, OUTPUT); // bit 1
  pinMode(7, OUTPUT);  // bit 2
  pinMode(8, OUTPUT);  // bit 3
  pinMode(6, OUTPUT);  // bit 4
  pinMode(20, OUTPUT); // bit 5
  pinMode(21, OUTPUT); // bit 6
  pinMode(5, OUTPUT);  // bit 7

  // select a test
  //manual();
  //polling();
  interrupt();
}

void loop() {
}
So the BasicTest example from https://github.com/Tuet/XY2_100 should work on the LC. I've tried changing things in the library XY2_100.cpp without a lot of success. I think there may be a problem in the isr ping/pong logic. For the LC, the isr only needs to do
Code:
  dma.clearInterrupt();
  dma.sourceBuffer(bits, sizeof(bits));
  dma.enable();
If i hack in an extra dmaSrouceBuffer() in the lib's ISR,
Code:
...
#elif defined(__MKL26Z64__)
  [COLOR="#FF0000"]dma.sourceBuffer((uint8_t *)pongBuffer, 40);   // HACK[/COLOR]
  FTM2_SC = 0;
  FTM2_SC = FTM_SC_TOF;
	dma.enable();		// enable DMA channel
	FTM2_CNT = 0; // writing any value resets counter 
	FTM2_SC = FTM_SC_DMA | FTM_SC_CLKS(1) | FTM_SC_PS(0);  
#endif 
...
then i start seeing activity on the PORTD pins.

Maybe if you do some more experimenting you can find the fix for the LC ...
 
Re: a better fix?

A better solution is to remove the if(txPing & 2) logic from the isr in the library, so the dma.sourceBuffer() is executed on every DMA ISR.
Code:
 [COLOR="#FF0000"]// if(txPing & 2) {
 //   txPing &= ~2;[/COLOR]
    if(txPing & 1) {
      dma.sourceBuffer((uint8_t *)pongBuffer, 40);
    } else {
      dma.sourceBuffer((uint8_t *)pingBuffer, 40);
    }
[COLOR="#FF0000"]//  }[/COLOR]
From scope observations of PORTD pins (i don't have a galvanometer), this fix works on both T3.2 and LC.
 
Thank you for your answer,
you were right, the timer stuff works properly.
And the problem seems to be with the ping pong.
I have implemented a solution similar to yours and the generated Signals look good.
There seems to be another problem, that being the clock is only half the speed compared to the data.
I should be high and low for every bit.

xy2_100_signals.PNG
 
There seems to be another problem, that being the clock is only half the speed compared to the data.
I should be high and low for every bit.

Hmmm, If the FTM timer is ticking at 4 MHz, then to generate a data pulse will require a tick to generate a HIGH bit and then a tick for a LOW bit, so the data rate for galvanometer data will be 2 Mhz. Or 10 us for the 20 data pulses, as your figure illustrates.

Here is scope shot of LC pins 2, 14, 7 (CLOCK+, SYNC+, CHN1+) (yellow, purple, blue) running BasicTest example
galvo.png
The TPM clock is ticking at 4 mhz, so data clock (pin 2) should be at 2MHz. You can see the 20 clock pulses (yellow) for a frame, along with low SYNC pulse (purple) on last bit (parity) of frame and the 16 bit blue X data pulses. The gap between the 20-bit frames is the DMA ISR overhead (2.8 us) and reduces the effective data rate to 1.47MHz. Not shown CHN2+ (the 16-bit Y data) on LC pin 8. See Paul's waveforms
 
Last edited:
So since i didnt really understood how the lib stored the Signals in the dma buffer, but i now know that the dma works and how( thanks to your programm),
i made my own Version on based on your program and some snippets from the lib.

Code:
//  PORTD  pins 2 14 7 8 6 20 21 5  TPM2
// order 5 21 20 6 8 7 14 2
// y- x- sync- clk- y+ x+ sync+ clk+

#include <DMAChannel.h>

#define PRREG(x) Serial.print(#x" 0x"); Serial.println(x,HEX)

#define FREQ 40000

DMAChannel dma(false);

uint8_t bits_ping[40] = {0};
uint8_t bits_pong[40] = {0};
volatile bool buff_select = false; //if true pong buffer beeing read

void prregs() {
  PRREG(SIM_SCGC6);
  PRREG(DMA_DSR_BCR0);
  PRREG(DMA_DCR0);
  PRREG(TPM2_SC);
  PRREG(TPM2_C1SC);
  PRREG(TPM2_MOD);
}

void timer_init() {
  // tpm2 chnl 1
  TPM2_SC = 0;
  TPM2_CNT = 0;
  TPM2_MOD = F_CPU / FREQ - 1;
  TPM2_C0V = 0;
  TPM2_SC =   FTM_SC_CLKS(1) | FTM_SC_PS(0) ;
  TPM2_C1SC |= FTM_CSC_DMA;
}

void dma_init() {
  // DMA init
  dma.begin(true);
  dma.sourceBuffer(bits_ping, sizeof(bits_ping));
  dma.destination(GPIOD_PDOR);
  dma.transferSize(1);
  dma.disableOnCompletion();
  dma.triggerAtHardwareEvent(DMAMUX_SOURCE_TPM2_CH1);
  dma.interruptAtCompletion();
  dma.attachInterrupt(dmaisr);
  dma.enable();
}

void dmaisr() {
  dma.clearInterrupt();

  //dma.sourceBuffer(bits_ping, sizeof(bits_ping));
  if(buff_select) {
      dma.sourceBuffer(bits_pong, sizeof(bits_pong));
  } else {
      dma.sourceBuffer(bits_ping, sizeof(bits_ping));
  }
  
  //FTM2_SC = 0;
  //FTM2_SC = FTM_SC_TOF;
  dma.enable();
  //FTM2_CNT = 0; // writing any value resets counter 
  //FTM2_SC = FTM_SC_DMA | FTM_SC_CLKS(1) | FTM_SC_PS(0);
}

void interrupt()  {
  dma_init();
  timer_init();
  prregs();
}

void setSignedXY(int16_t X, int16_t Y) {
  // -32768 => 0; 32767 => 65535;
  int32_t xu = (int32_t)X + 32768L, yu = (int32_t)Y + 32768L;
  setXY((uint16_t)xu, (uint16_t)yu);
}

void setXY(uint16_t X, uint16_t Y) {
  uint32_t Ch1 = (((uint32_t)X << 1) | 0x20000ul) & 0x3fffeul;
  uint32_t Ch2 = (((uint32_t)Y << 1) | 0x20000ul) & 0x3fffeul;
  uint8_t *bits_data;
  uint8_t data_word = 0x00;
  uint8_t parity1 = 0;
  uint8_t parity2 = 0;

  if(buff_select) {       //pong buffer beeing read atm so write in ping buffer
    bits_data=bits_ping;
  } else {
    bits_data=bits_pong;
  } 

  for(int i=0; i<=19; i++) {
    if(Ch1 & (1 << i)) parity1++;
    if(Ch2 & (1 << i)) parity2++;
  }  
  if(parity1 & 1) Ch1 |= 1;
  if(parity2 & 1) Ch2 |= 1;
  
  for(int i=19; i>=0; i--) {  
    data_word=0x00;
    if(Ch1 & (1 << i)) data_word |= (1<<2); else data_word |= (1<<6);
    if(Ch2 & (1 << i)) data_word |= (1<<3); else data_word |= (1<<7);

    if(i==0)data_word |= (1<<5); else data_word |= (1<<1); //sync low when last bit, sync high default

    bits_data[39-(2*i+1)]=data_word | (1<<0); //clk high
    bits_data[39-(2*i)]=data_word | (1<<4);   //clk low
  }

  if(buff_select) {     //change the read buffer
    buff_select=false;
  } else {
    buff_select=true;
  } 
}

void setup() {
  Serial.begin(9600);
  //while (!Serial);
  pinMode(13, OUTPUT);
  // PORTD
  pinMode(2, OUTPUT);  // bit 0
  pinMode(14, OUTPUT); // bit 1
  pinMode(7, OUTPUT);  // bit 2
  pinMode(8, OUTPUT);  // bit 3
  pinMode(6, OUTPUT);  // bit 4
  pinMode(20, OUTPUT); // bit 5
  pinMode(21, OUTPUT); // bit 6
  pinMode(5, OUTPUT);  // bit 7
  interrupt();
  
}

void loop() {
  static int16_t x = 0, y = 65535;
  const uint16_t pause = 10;
  while(1){
  x+=4;
  setXY(x,x);
  delay(pause);
  }
  //setSignedXY(-x, y);
  //delay(pause);
  //setSignedXY(-x, -y);
  //delay(pause);
  //setSignedXY(x, -y);
  //delay(pause);
}

This code now produces the the Signals i am looking for.
The clock if obviously still half the value that is defined in the code but i now have a falling clk edge in the center of every data bit.

At high speeds(clk values) 0.5Mhz real the Signal quality gets bad, probably cause im using a breadboard and bad untwisted wires.
But at 4Mhz in the code(1.5 Mhz produced) the galvo still acts the same as if the clokc was lower.

So the problem with the signal generation is fixed.
But the galvo still doesnt like the signals.
It only goes into maximum deflection in the direction i want it to go.

Im going to analyze the signals from an external control board to see if they are different or maybe the problem is that the clock is not 2Mhz.
 
So a quick follow up,
i had accidentally switched up the +/- wire of the sync signal on the way to the galvo, this caused the weird behavior.
My code works very well, it runs good on 1Mhz and it takes around 20us to execute the sendSignedXY() function.
For anyone interested, i have rewritten the code as a library similar to the one that didnt work.

And a final thanks for the help :)

(Edit changed the attachments to the correct files)
 

Attachments

  • RectangleTest.ino
    629 bytes · Views: 27
  • XY2_100.cpp
    3 KB · Views: 25
  • XY2_100.h
    585 bytes · Views: 27
Last edited:
the .cpp file you attached looks to be the original from the github site ??

I added a scope snapshot to post #5 showing LC pins 2,14,7
 
Back
Top