Code:
#include <SPI.h>
#include <DMAChannel.h>
DMAChannel spi_dma(false);
#define SPI_DMA_MAX 4096
static uint32_t spi_dma_q[2][SPI_DMA_MAX] __attribute__((aligned(32))); // Double buffer
static unsigned spi_dma_which; // Which buffer is being processed
static unsigned spi_dma_count; // How full the buffer is
//static unsigned spi_dma_in_progress; // DMA in progress flag
volatile int spi_dma_in_progress; // DMA in progress flag
// Not sure how these are allocated, or if they're correct, but they function on Teensy 3.2
#define SPI_DMA_CS_IC3 0
#define SPI_DMA_CS_IC5 1
#define SPI_DMA_CS_IC4 2
// Teensy 4.1 SS pins connected to DACs
const int SS0_IC5 = 8;
const int SS1_IC4 = 6;
const int SS1_IC3 = 22;
#define SDI 11
#define SCK 13
#define DELAY_PIN 7
#define IO_PIN 5
// settings
static int OFF_SHIFT = 5;
static int OFF_DWELL0 = 0;
static int OFF_DWELL1 = 2;
static int OFF_DWELL2 = 2;
static int NORMAL_SHIFT = 2;
static uint16_t x_pos;
static uint16_t y_pos;
#define DAC_CHAN_A 0
#define DAC_CHAN_B 1
static int DAC_X_CHAN = 1;
static int DAC_Y_CHAN = 0;
#define MAX_PTS 3000
static unsigned rx_points;
static unsigned num_points;
typedef struct ColourIntensity {
uint8_t red;
uint8_t green;
uint8_t blue;
} ColourIntensity_t;
static ColourIntensity_t LastColInt;
// Chunk of data to process using DMA or SPI
typedef struct DataChunk {
uint16_t x;
uint16_t y;
uint8_t red;
uint8_t green;
uint8_t blue;
} DataChunk_t;
static DataChunk_t Chunk[MAX_PTS];
#define REFRESH_RATE 20000u
void setup()
{
Serial.begin(9600);
// Set SS pins to 3 DACs to output
pinMode(SS0_IC5, OUTPUT);
digitalWriteFast(SS0_IC5, HIGH);
delayNanoseconds(100);
pinMode(SS1_IC4, OUTPUT);
digitalWriteFast(SS1_IC4, HIGH);
delayNanoseconds(100);
pinMode(SS1_IC3, OUTPUT);
digitalWriteFast(SS1_IC3, HIGH);
delayNanoseconds(100);
pinMode(DELAY_PIN, OUTPUT);
pinMode(IO_PIN, OUTPUT);
digitalWriteFast(DELAY_PIN, 0);
delayNanoseconds(100);
pinMode(SDI, OUTPUT);
pinMode(SCK, OUTPUT);
delay(1); // https://www.pjrc.com/better-spi-bus-design-in-3-steps/
draw_test_pattern();
SPI.begin();
SPI.beginTransaction(SPISettings(20000000, MSBFIRST, SPI_MODE0));
/* uint32_t fastio = IOMUXC_PAD_SRE | IOMUXC_PAD_DSE(3) | IOMUXC_PAD_SPEED(3); Do the 3 pins need to be configured for DMA?
// from old Teensy 3.2 code : configure the output on pins 6, 8, 22 for !SS0 from the SPI hardware
// and pin 6 for !SS1.
CORE_PIN6_CONFIG = fastio;
CORE_PIN8_CONFIG = fastio;
CORE_PIN22_CONFIG = fastio;
*/
// Set up a DMA channel to send the SPI data
spi_dma.begin(true); // Allocate the DMA channel first
spi_dma.destination((volatile uint8_t &) LPSPI4_TDR); // SHOULD BE UINT32_T???
spi_dma.disableOnCompletion();
spi_dma.triggerAtHardwareEvent( DMAMUX_SOURCE_LPSPI4_TX ); // start
spi_dma.transferSize(4); // write all 32-bits
// send something to get it started
spi_dma_which = 0;
spi_dma_count = 0;
spi_dma_tx_append(0, 1);
spi_dma_tx_append(0, 2);
spi_dma_tx();
}
void loop()
{
static uint32_t frame_micros;
uint32_t now;
while (1)
{
now = micros();
// make sure we flush the partial buffer once the last one has completed
if (spi_dma_tx_complete())
{
if (rx_points == 0 && now - frame_micros > REFRESH_RATE)
break;
spi_dma_tx();
}
}
frame_micros = now;
// if there are any DMAs currently in transit, wait for them to complete
while (!spi_dma_tx_complete())
;
// now start any last buffered ones and wait for those to complete.
spi_dma_tx();
while (!spi_dma_tx_complete())
;
for(unsigned n = 0 ; n < num_points ; n++)
{
const DataChunk_t pt = Chunk[n];
if (pt.red + pt.green + pt.blue == 0)
draw_moveto(pt.x, pt.y);
else
{
brightness(pt.red, pt.green, pt.blue);
_draw_lineto(pt.x, pt.y, NORMAL_SHIFT);
}
}
brightness(0, 0, 0);
goto_x(2048);
goto_y(2048);
}
static void draw_test_pattern()
{
rx_points = 0;
rx_append(0, 0, 0, 0, 0);
rx_append(1024, 0, 128, 128, 128);
rx_append(1024, 1024, 128, 128, 128);
rx_append(0, 1024, 128, 128, 128);
rx_append(0, 0, 128, 128, 128);
}
void rx_append(int x, int y, uint8_t red, uint8_t green, uint8_t blue)
{
rx_points ++;
Chunk[rx_points].x = x & 0xFFF;
Chunk[rx_points].y = y & 0xFFF;
Chunk[rx_points].red = red;
Chunk[rx_points].green = green;
Chunk[rx_points].blue = blue;
}
void draw_moveto(int x1, int y1)
{
brightness(0, 0, 0);
dwell(OFF_DWELL1);
_draw_lineto(x1, y1, OFF_SHIFT);
dwell(OFF_DWELL2);
}
static inline void brightness(uint8_t red, uint8_t green, uint8_t blue)
{
dwell(OFF_DWELL0);
if (LastColInt.red != red)
{
LastColInt.red = red;
// Some confusion here as to what to send : the SPI_DMA_CS or SS pin ...
MCP4922_write(SPI_DMA_CS_IC5, DAC_CHAN_B, red << 4);
// MCP4922_write(SS0_IC5, DAC_CHAN_B, red << 4);
}
if (LastColInt.green != green)
{
LastColInt.green = green;
MCP4922_write(SS0_IC5, DAC_CHAN_A, green << 4);
}
if (LastColInt.blue != blue)
{
LastColInt.blue = blue;
MCP4922_write(SS0_IC5, DAC_CHAN_B, blue << 4);
}
}
static inline void goto_x(uint16_t x)
{
x_pos = x;
MCP4922_write(SS1_IC4, DAC_X_CHAN, 4095 - x);
}
static inline void goto_y(uint16_t y)
{
y_pos = y;
MCP4922_write(SS1_IC4, DAC_Y_CHAN, 4095 - y);
}
static void dwell(const int count)
{
for (int i = 0 ; i < count ; i++)
{
if (i & 1)
goto_x(x_pos);
else
goto_y(y_pos);
}
}
static inline void _draw_lineto(int x1, int y1, const int bright_shift)
{
int dx;
int dy;
int sx;
int sy;
const int x1_orig = x1;
const int y1_orig = y1;
int x_off = x1 & ((1 << bright_shift) - 1);
int y_off = y1 & ((1 << bright_shift) - 1);
x1 >>= bright_shift;
y1 >>= bright_shift;
int x0 = x_pos >> bright_shift;
int y0 = y_pos >> bright_shift;
goto_x(x_pos);
goto_y(y_pos);
if (x0 <= x1)
{
dx = x1 - x0;
sx = 1;
}
else
{
dx = x0 - x1;
sx = -1;
}
if (y0 <= y1)
{
dy = y1 - y0;
sy = 1;
}
else
{
dy = y0 - y1;
sy = -1;
}
int err = dx - dy;
while (1)
{
if (x0 == x1 && y0 == y1)
break;
int e2 = 2 * err;
if (e2 > -dy)
{
err = err - dy;
x0 += sx;
goto_x(x_off + (x0 << bright_shift));
}
if (e2 < dx)
{
err = err + dx;
y0 += sy;
goto_y(y_off + (y0 << bright_shift));
}
}
// ensure that we end up exactly where we want
goto_x(x1_orig);
goto_y(y1_orig);
}
void MCP4922_write(int cs_pin, byte dac, uint16_t value)
{
value &= 0x0FFF; // mask out just the 12 bits of data
// add the output channel A or B on the selected DAC, and buffer flag
#if 1
// select the output channel on the selected DAC, buffered, no gain
value |= 0x7000 | (dac == 1 ? 0x8000 : 0x0000);
#else
// select the output channel on the selected DAC, unbuffered, no gain
value |= 0x3000 | (dac == 1 ? 0x8000 : 0x0000);
#endif
if (spi_dma_tx_append(value, cs_pin) == 0)
return;
// wait for the previous line to finish
while(!spi_dma_tx_complete())
;
// now send this line, which swaps buffers
spi_dma_tx();
}
static int spi_dma_tx_append(uint16_t value, int spi_dma_cs)
{
spi_dma_q[spi_dma_which][spi_dma_count++] = 0 | ((uint32_t)value) | (spi_dma_cs << 16); // enable the chip select line
if (spi_dma_count == SPI_DMA_MAX)
return 1;
return 0;
}
static void spi_dma_tx()
{
if (spi_dma_count == 0)
return;
digitalWriteFast(DELAY_PIN, 1);
// add a EOQ to the last entry
spi_dma_q[spi_dma_which][spi_dma_count-1] |= (1<<27);
spi_dma.clearComplete();
spi_dma.clearError();
spi_dma.sourceBuffer(spi_dma_q[spi_dma_which], 4 * spi_dma_count); // in bytes, not thingies
spi_dma_which = !spi_dma_which;
spi_dma_count = 0;
LPSPI4_CR &= ~LPSPI_CR_MEN; // Disable LPSPI:
LPSPI4_CFGR1 |= LPSPI_CFGR1_NOSTALL; // Prevent stall from RX
//LPSPI4_TCR = 15; // Framesize 16 Bits - seems to stop things working on Teensy 4.1
LPSPI4_FCR = 0; // Fifo Watermark
LPSPI4_DER = LPSPI_DER_TDDE; // TX DMA Request Enable
LPSPI4_CR |= LPSPI_CR_MEN; // Enable LPSPI
digitalWrite(SS0_IC5, LOW);
digitalWrite(SS1_IC4, LOW);
digitalWrite(SS1_IC3, LOW);
spi_dma.enable();
spi_dma_in_progress = 1;
}
static int spi_dma_tx_complete()
{
if (!spi_dma_in_progress) // if nothing is in progress, we're "complete"
return 1;
if (!spi_dma.complete())
return 0;
digitalWriteFast(DELAY_PIN, 0);
digitalWrite(SS0_IC5, HIGH);
digitalWrite(SS1_IC4, HIGH);
digitalWrite(SS1_IC3, HIGH);
spi_dma.clearComplete();
spi_dma.clearError();
delayMicroseconds(5);
LPSPI4_CR &= ~LPSPI_CR_MEN; // disable LPSPI:
spi_dma_in_progress = 0;
return 1;
}