Sanity Check on Code to do I2S/DMA tdm Teensy 4.1

Neal

Well-known member
I am trying to do single DMA SAI1 TDM input/output from a codec chip to a teensy 4. Each transfer is 8 - 32bit words. I am using code heavily leveraged from the Audio Library I2S and tdm code, but I am not using Audio streaming. My problem is that sometimes receiving ADC data works flawlessly and then I will do a simple code change and there is a problem. When working correctly the codec ADC values are DMA'd into the receive buffer in correct order. When it fails the samples get shifted two positions in the receive buffer. It always fails the same way. The DAC output tdm does not fail.

So if you have experience with DMA/I2S communication, please see if you can spot something I am missing in the code below. I have read about similar problems in the FORUM that were related to cache flushing/deleting. But I think I am doing that correctly.

Any help would be appreciated. I only have a few hairs left to pull out!

Code:
DMAChannel CodecTDM::dma(false); //left codec to DAC using SAI1
DMAChannel CodecTDM::dma2(false); //left coded from ADC


static int32_t tdm_tx_buffer[16] DMAMEM __attribute__((aligned(32))); // codec pingpongbuffers
static int32_t tdm_rx_buffer[16] DMAMEM __attribute__((aligned(32)));

void CodecTDM::begin(void)
{
	dma.begin(true); // Allocate the DMA channel for Codec DAC
	dma2.begin(true); // Allocate the DMA channel for Codec ADC
	
	memset(tdm_tx_buffer, 0, sizeof(tdm_tx_buffer));
	
	config_tdm(); //configure left codec tdm

	//DMA DAC Configuration
	CORE_PIN7_CONFIG  = 3;  //1:TX_DATA0
	//Minor loop is gathering 4 bytes to transfer a 32 bit word
	//Major loop is 8 Ping + 8 Pong transfers = 16 Minor loops
	//Interrupts occur at half point in major loop (after 8 32bit words) and again at the end of the major loop (after another 8 32bit words)
	dma.TCD->SADDR = tdm_tx_buffer; 
	dma.TCD->SOFF = 4; 
	dma.TCD->ATTR = DMA_TCD_ATTR_SSIZE(2) | DMA_TCD_ATTR_DSIZE(2); 
	dma.TCD->NBYTES_MLNO = 4; 
	dma.TCD->SLAST = -sizeof(tdm_tx_buffer); 
	dma.TCD->DADDR = &I2S1_TDR0; 
	dma.TCD->DOFF = 0; 
	dma.TCD->CITER_ELINKNO = sizeof(tdm_tx_buffer) / 4; 
	dma.TCD->DLASTSGA = 0; 
	dma.TCD->BITER_ELINKNO = sizeof(tdm_tx_buffer) / 4; 
	dma.TCD->CSR = DMA_TCD_CSR_INTHALF | DMA_TCD_CSR_INTMAJOR; 
	dma.triggerAtHardwareEvent(DMAMUX_SOURCE_SAI1_TX);
	dma.enable();
	I2S1_RCSR |= I2S_RCSR_RE | I2S_RCSR_BCE;
	I2S1_TCSR = I2S_TCSR_TE | I2S_TCSR_BCE | I2S_TCSR_FRDE;
	dma.attachInterrupt(isr);

	//DMA ADC configuration
	CORE_PIN8_CONFIG  = 3;  //RX_DATA0
	IOMUXC_SAI1_RX_DATA0_SELECT_INPUT = 2;
	dma2.TCD->SADDR = &I2S1_RDR0;
	dma2.TCD->SOFF = 0;
	dma2.TCD->ATTR = DMA_TCD_ATTR_SSIZE(2) | DMA_TCD_ATTR_DSIZE(2);
	dma2.TCD->NBYTES_MLNO = 4;
	dma2.TCD->SLAST = 0;
	dma2.TCD->DADDR = tdm_rx_buffer;
	dma2.TCD->DOFF = 4;
	dma2.TCD->CITER_ELINKNO = sizeof(tdm_rx_buffer) / 4;
	dma2.TCD->DLASTSGA = -sizeof(tdm_rx_buffer);
	dma2.TCD->BITER_ELINKNO = sizeof(tdm_rx_buffer) / 4;
	dma2.TCD->CSR = DMA_TCD_CSR_INTHALF | DMA_TCD_CSR_INTMAJOR;
	dma2.triggerAtHardwareEvent(DMAMUX_SOURCE_SAI1_RX);
	dma2.enable();
	I2S1_RCSR = I2S_RCSR_RE | I2S_RCSR_BCE | I2S_RCSR_FRDE | I2S_RCSR_FR;
	dma2.attachInterrupt(isr2);


void CodecTDM::isr(void) // DAC isr
{
	int32_t *dest;
	uint32_t saddr;

	saddr = (uint32_t)(dma.TCD->SADDR);
	dma.clearInterrupt();
	if (saddr < (uint32_t)tdm_tx_buffer + sizeof(tdm_tx_buffer) / 2) {
		dest = tdm_tx_buffer + 8;
	} else {
		dest = tdm_tx_buffer;
	}

	#if IMXRT_CACHE_ENABLED >= 2
		int32_t *dc = dest;
	#endif

	// queue up next set of DAC outputs left codec
	*dest++ = Outputs[0]; 
	*dest++ = Outputs[1]; 
	*dest++ = Outputs[2]; 
	*dest++ = Outputs[3]; 
	*dest++ = Outputs[4]; 
	*dest++ = Outputs[5]; 
	*dest++ = Outputs[6]; 
	*dest++ = Outputs[7]; 

	#if IMXRT_CACHE_ENABLED >= 2
		arm_dcache_flush_delete(dc, sizeof(tdm_tx_buffer) / 2 );
	#endif
	__DSB();
}

void CodecTDM::isr2(void) // ADC isr
{
	uint32_t daddr;
	int32_t *src;

	daddr = (uint32_t)(dma2.TCD->DADDR);
	dma2.clearInterrupt();

	if (daddr < (uint32_t)tdm_rx_buffer + sizeof(tdm_rx_buffer) / 2) {
		src = tdm_rx_buffer + 8;
	} else {
		src = tdm_rx_buffer;
	}

	#if IMXRT_CACHE_ENABLED >=1
		arm_dcache_delete((void*)src, sizeof(tdm_rx_buffer) / 2);
	#endif

	// capture latest left codec inputs
	Inputs[0] = *src++; 
	Inputs[1] = *src++; 
	Inputs[2] = *src++; 
	Inputs[3] = *src++; 
	Inputs[4] = *src++; 
	Inputs[5] = *src;   

	__DSB();
}

void CodecTDM::config_tdm(void) 
{
	CCM_CCGR5 |= CCM_CCGR5_SAI1(CCM_CCGR_ON);

	// if either transmitter or receiver is enabled, do nothing
	if (I2S1_TCSR & I2S_TCSR_TE) return;
	if (I2S1_RCSR & I2S_RCSR_RE) return;

	//PLL:
	int fs = PLATFORM_SAMPLE_RATE;
	// PLL between 27*24 = 648MHz und 54*24=1296MHz
	int n1 = 4; //SAI prescaler 4 => (n1*n2) = multiple of 4
	int n2 = 1 + (24000000 * 27) / (fs * 256 * n1);

	double C = ((double)fs * 256 * n1 * n2) / 24000000;
	int c0 = C;
	int c2 = 10000;
	int c1 = C * c2 - (c0 * c2);
	set_audioClock(c0, c1, c2);
	// clear SAI1_CLK register locations
	CCM_CSCMR1 = (CCM_CSCMR1 & ~(CCM_CSCMR1_SAI1_CLK_SEL_MASK))
		   | CCM_CSCMR1_SAI1_CLK_SEL(2); // &0x03 // (0,1,2): PLL3PFD0, PLL5, PLL4

	n1 = n1 / 2; //Double Speed for TDM

	CCM_CS1CDR = (CCM_CS1CDR & ~(CCM_CS1CDR_SAI1_CLK_PRED_MASK | CCM_CS1CDR_SAI1_CLK_PODF_MASK))
		   | CCM_CS1CDR_SAI1_CLK_PRED(n1-1) // &0x07
		   | CCM_CS1CDR_SAI1_CLK_PODF(n2-1); // &0x3f

	IOMUXC_GPR_GPR1 = (IOMUXC_GPR_GPR1 & ~(IOMUXC_GPR_GPR1_SAI1_MCLK1_SEL_MASK))
			| (IOMUXC_GPR_GPR1_SAI1_MCLK_DIR | IOMUXC_GPR_GPR1_SAI1_MCLK1_SEL(0));	//Select MCLK

	// configure transmitter
	int rsync = 0;
	int tsync = 1;

	I2S1_TMR = 0;
	I2S1_TCR1 = I2S_TCR1_RFW(4);
	I2S1_TCR2 = I2S_TCR2_SYNC(tsync) | I2S_TCR2_BCP | I2S_TCR2_MSEL(1)
		| I2S_TCR2_BCD | I2S_TCR2_DIV(0);
	I2S1_TCR3 = I2S_TCR3_TCE;
	I2S1_TCR4 = I2S_TCR4_FRSZ(7) | I2S_TCR4_SYWD(0) | I2S_TCR4_MF
		| I2S_TCR4_FSE | I2S_TCR4_FSD;
	I2S1_TCR5 = I2S_TCR5_WNW(31) | I2S_TCR5_W0W(31) | I2S_TCR5_FBT(31);

	I2S1_RMR = 0;
	I2S1_RCR1 = I2S_RCR1_RFW(4);
	I2S1_RCR2 = I2S_RCR2_SYNC(rsync) | I2S_TCR2_BCP | I2S_RCR2_MSEL(1)
		| I2S_RCR2_BCD | I2S_RCR2_DIV(0);
	I2S1_RCR3 = I2S_RCR3_RCE;
	I2S1_RCR4 = I2S_RCR4_FRSZ(7) | I2S_RCR4_SYWD(0) | I2S_RCR4_MF
		| I2S_RCR4_FSE | I2S_RCR4_FSD;
	I2S1_RCR5 = I2S_RCR5_WNW(31) | I2S_RCR5_W0W(31) | I2S_RCR5_FBT(31);

	CORE_PIN23_CONFIG = 3;  //1:MCLK
	CORE_PIN21_CONFIG = 3;  //1:RX_BCLK
	CORE_PIN20_CONFIG = 3;  //1:RX_SYNC
}
 
I fixed this problem and thought I would document how. I found the following on page 1989 of the i.MT RT1060 Processor Manual. When I enabled things in the order outlined here, everything worked fine.

Code:
If both the transmitter and receiver use the transmitter bit clock and frame sync:
		• Configure the transmitter for asynchronous operation and the receiver for
			synchronous operation.
		• Enable the receiver in synchronous mode only after both the transmitter and receiver
			are enabled.
		• Enable the transmitter last.
 
Back
Top