manitou
Senior Member+
Well, I thought I'd try to get a DMA sketch working for teensy 3.0. I looked at the I2S DMA example and chapter 21 of the ARM manual -- but all those registers are overwhelming. I have developed similar memory to memory DMA sketches on Maple and DUE. Here is my non-working attempt on teensy
I have tried random changes to the various DMA regs to no avail ... I was hoping the dma_init would do a copy from src[] to dst[], then I was going to generalize to memcpy32()
Any hints would be appreciated.
thanks
Code:
/**********************************************************************
teensy only 16KB RAM
use DMA for memcpy memset ch 21 pg 387
I2S example uses DMA_MUX ??
*/
#define WORDS 1000
uint32_t src[WORDS],dst[WORDS];
//int32_t DMAMEM _dma_Buffer_A[WORDS];
//int32_t DMAMEM _dma_Buffer_B[WORDS];
volatile int DMAdone=0;
// CINT - DMA Clear Interrupt Request Register
#define DMA_CINT_CINT(n) ((uint8_t)(n & 3)<<0) // Clear Interrupt Request
#define DMA_CINT_CAIR ((uint8_t)1<<6) // Clear All Interrupt Requests
#define DMA_CINT_NOP ((uint8_t)1<<7) // NOP
#define DMA_CITER_MASK ((uint16_t)0x7FFF) // Loop count mask
#define DMA_CITER_ELINK ((uint16_t)1<<15) // Enable channel linking on minor-loop complete
#define DMA_BITER_MASK ((uint16_t)0x7FFF) // Loop count mask
#define DMA_BITER_ELINK ((uint16_t)1<<15) // Enable channel linking on min
// ERQ - DMA Enable Request Register
#define DMA_ERQ_ERQ0 ((uint32_t)1<<0) // Enable DMA Request 0
#define DMA_ERQ_ERQ1 ((uint32_t)1<<1) // Enable DMA Request 1
#define DMA_ERQ_ERQ2 ((uint32_t)1<<2) // Enable DMA Request 2
#define DMA_ERQ_ERQ3 ((uint32_t)1<<3) // Enable DMA Request 3
// SERQ - DMA Set Enable Request Register
#define DMA_SERQ_SERQ(n) ((uint8_t)(n & 3)<<0) // Set Enable Request
#define DMA_SERQ_SAER ((uint8_t)1<<6) // Set All Enable Requests
#define DMA_SERQ_NOP ((uint8_t)1<<7) // NOP
#define DMA_CR_EMLM ((uint32_t)0x80) // Enable Minor Loop Mapping
void dma_ch0_isr(void)
{
DMAdone=1;
DMA_CINT = DMA_CINT_CINT(0); // use the Clear Intr. Request register
}
void dma_init() {
// Enable IRQ on the DMA channel 0
// NVIC_ENABLE_IRQ(IRQ_DMA_ERROR);
NVIC_ENABLE_IRQ(IRQ_DMA_CH0);
// Set inactive
DMA_TCD0_CSR &= ~(DMA_TCD_CSR_ACTIVE);
// Control register
DMA_CR = 0 // Normal
// | DMA_CR_EDBG_MASK // Stall DMA transfers when debugger is halted (avoid noise)
| DMA_CR_EMLM; // Enable minor looping
// fill the TCD regs
DMA_TCD0_SADDR = (const volatile void *) src ; // alternated with _dma_Buffer_B by our interrupt handler
DMA_TCD0_SOFF = 4; // 4 byte offset
DMA_TCD0_ATTR = DMA_TCD_ATTR_SMOD(0) // No source modulo
| DMA_TCD_ATTR_SSIZE(DMA_TCD_ATTR_SIZE_32BIT)
| DMA_TCD_ATTR_DMOD(0) // No destination modulo
| DMA_TCD_ATTR_DSIZE(DMA_TCD_ATTR_SIZE_32BIT);
DMA_TCD0_NBYTES_MLNO = 4; // Transfer 4 bytes in each service request
DMA_TCD0_SLAST = 0; // source address will always be newly written before each new start
DMA_TCD0_DADDR = (volatile void *) dst; // Destination
DMA_TCD0_DOFF = 4; // destination offset after each write
DMA_TCD0_DLASTSGA = 0; // No scatter/gather
DMA_TCD0_CITER_ELINKNO = WORDS & DMA_CITER_MASK; // major loop iteration count
DMA_TCD0_BITER_ELINKNO = WORDS & DMA_BITER_MASK; // major loop iteration count
DMA_TCD0_CSR = DMA_TCD_CSR_INTMAJOR; // interrupt on major loop completion
// no stalls | DMA_TCD_CSR_BWC(3); // DMA bandwidth control
// enable DMA channel 0 requests
DMA_ERQ = DMA_ERQ_ERQ0;
DMA_SERQ = DMA_SERQ_SERQ(0);
// Set active
// DMA_TCD0_CSR |= DMA_TCD_CSR_ACTIVE;
// To initiate from software, set DMA_CSR[start]
DMA_TCD0_CSR |= DMA_TCD_CSR_START;
}
void setup(){
int i;
Serial.begin(9600);
while(!Serial);
for (i=0;i<WORDS;i++){
dst[i]=0;
src[i]=i;
}
dma_init();
delay(1000);
if (DMAdone) Serial.println("done");
Serial.println(dst[4]);
Serial.println( DMA_TCD0_CSR,HEX);
}
void loop(){
int i,t1,t2;
for (i=0;i<WORDS;i++){
dst[i]=0;
src[i]=i;
}
#if 0
memcpy32(dst,src,WORDS);
Serial.println(dst[3],DEC);
memset32(dst,45,WORDS);
Serial.println(dst[3],DEC);
t1=micros();
memcpy32(dst,src,WORDS);
t2 = micros() - t1;
Serial.print("memcpy32 ");Serial.println(t2,DEC);
t1=micros();
memset32(dst,66,WORDS);
t2 = micros() - t1;
Serial.print("memset32 ");Serial.println(t2,DEC);
#endif
t1=micros();
for(i=0;i<WORDS;i++) dst[i] = src[i];
t2 = micros() - t1;
Serial.print("loop ");Serial.println(t2,DEC);
dst[3]=99;
t1=micros();
memcpy(dst,src,4*WORDS);
t2 = micros() - t1;
Serial.print("memcpy ");Serial.println(t2,DEC);
Serial.println(dst[3],DEC);
t1=micros();
memset(dst,66,4*WORDS);
t2 = micros() - t1;
Serial.print("memset ");Serial.println(t2,DEC);
Serial.println(dst[3],HEX);
t1=micros();
for(i=0;i<WORDS;i++) dst[i] = 66;
t2 = micros() - t1;
Serial.print("set loop ");Serial.println(t2,DEC);
Serial.println();
delay(3000);
}
I have tried random changes to the various DMA regs to no avail ... I was hoping the dma_init would do a copy from src[] to dst[], then I was going to generalize to memcpy32()
Any hints would be appreciated.
thanks