FASTRUN void RA8876_t3::MulBeatWR_nPrm_DMA(const void *value, uint32_t const length)
{
while(WR_DMATransferDone == false) {} //Wait for any DMA transfers to complete
uint32_t BeatsPerMinLoop = SHIFTNUM * sizeof(uint32_t) / sizeof(uint8_t); // Number of shifters * number of 8 bit values per shifter
uint32_t majorLoopCount, minorLoopBytes;
uint32_t destinationModulo = 31-(__builtin_clz(SHIFTNUM*sizeof(uint32_t))); // defines address range for circular DMA destination buffer
// FlexIO_Config_SnglBeat();
CSLow();
DCHigh();
if (length < 8){
//Serial.println ("In DMA but to Short to multibeat");
const uint16_t * newValue = (uint16_t*)value;
uint16_t buf;
for(uint32_t i=0; i<length; i++)
{
buf = *newValue++;
while(0 == (p->SHIFTSTAT & (1U << 0)))
{
}
p->SHIFTBUF[0] = buf >> 8;
while(0 == (p->SHIFTSTAT & (1U << 0)))
{
}
p->SHIFTBUF[0] = buf & 0xFF;
}
//Wait for transfer to be completed
while(0 == (p->TIMSTAT & (1U << 0)))
{
}
CSHigh();
} else {
//memcpy(framebuff, value, length);
//arm_dcache_flush((void*)framebuff, sizeof(framebuff)); // always flush cache after writing to DMAMEM variable that will be accessed by DMA
FlexIO_Config_MultiBeat();
MulBeatCountRemain = length % BeatsPerMinLoop;
MulBeatDataRemain = (uint16_t*)value + ((length - MulBeatCountRemain)); // pointer to the next unused byte (overflow if MulBeatCountRemain = 0)
TotalSize = (length - MulBeatCountRemain)*2; /* in bytes */
minorLoopBytes = SHIFTNUM * sizeof(uint32_t);
majorLoopCount = TotalSize/minorLoopBytes;
//Serial.printf("Length(16bit): %d, Count remain(16bit): %d, Data remain: %d, TotalSize(8bit): %d, majorLoopCount: %d \n",length, MulBeatCountRemain, MulBeatDataRemain, TotalSize, majorLoopCount );
/* Configure FlexIO with multi-beat write configuration */
flexDma.begin();
/* Setup DMA transfer with on-the-fly swapping of MSB and LSB in 16-bit data:
* Within each minor loop, read 16-bit values from buf in reverse order, then write 32bit values to SHIFTBUFBYS[i] in reverse order.
* Result is that every pair of bytes are swapped, while half-words are unswapped.
* After each minor loop, advance source address using minor loop offset. */
int destinationAddressOffset, destinationAddressLastOffset, sourceAddressOffset, sourceAddressLastOffset, minorLoopOffset;
volatile void *destinationAddress, *sourceAddress;
DMA_CR |= DMA_CR_EMLM; // enable minor loop mapping
/* My most time-consumed lines of code to get a perfect image is here. I still don't fully understand why this is needed. But here is my clue:
* The DMA setup further down uses the transfers in reverse mode. That's another thing that I don't understand as this is the only way I get it working.
* It seems that because of the reverse transfer logic of DMA the first (in forward thinking) bytes which equals the first 12 clocks are zero.
* This lead to 6 black pixels for every new buffer transfered. Together with my Logic Analyzer and countless hours I found out that I can set the data
* for the first 12 clocks if I set the first three SHIFTERS with the first 12 bytes of the image. This is done here.
* Why 12 and not 16 and why SHIFTER 4 is not having this problem - I don't know.....
* Somebody more celver than me can maybe explain this.
*/
p->SHIFTBUFHWS[0] = *(uint32_t *)value;
uint32_t *value32 = (uint32_t *)value;
value32++;
p->SHIFTBUFHWS[1] = *(uint32_t *)value32;
value32++;
p->SHIFTBUFHWS[2] = *(uint32_t *)value32;
/*
* this is a regular "forward" way and high-level way of doing DMA transfers which should work in our use-case. But it doesn't.
* for whatever reason, the buffer must be filled in reverse order. I don't know why. But it works. It could be that finer control
* of the minor Loop behavior is needed which is not available through this DMA API therefore this is setup manually below.
flexDma.begin();
flexDma.sourceBuffer((volatile uint16_t*)value, majorLoopCount*2);
flexDma.destinationCircular((volatile uint32_t*)&p->SHIFTBUF[SHIFTNUM-1], SHIFTNUM);
flexDma.transferCount(majorLoopCount);
flexDma.transferSize(minorLoopBytes*2);
flexDma.triggerAtHardwareEvent(hw->shifters_dma_channel[SHIFTER_DMA_REQUEST]);
flexDma.disableOnCompletion();
flexDma.interruptAtCompletion();
flexDma.clearComplete();
flexDma.attachInterrupt(dmaISR);
flexDma.enable();
dmaCallback = this;
return;
*/
/* From now on, the SHIFTERS in MultiBeat mode are working correctly. Begin DMA transfer */
sourceAddress = (uint16_t*)value + minorLoopBytes/sizeof(uint16_t) - 1; // last 16bit address within current minor loop
sourceAddressOffset = -sizeof(uint16_t); // read values in reverse order
minorLoopOffset = 2*minorLoopBytes; // source address offset at end of minor loop to advance to next minor loop
sourceAddressLastOffset = minorLoopOffset - TotalSize; // source address offset at completion to reset to beginning
// Use SHIFTBUHWS instead of SHIFTBUF or SHIFBUFBYS.
destinationAddress = (void *)&p->SHIFTBUFHWS[SHIFTNUM - 1]; // last 32bit shifter address (with reverse byte order)
destinationAddressOffset = -sizeof(uint32_t); // write words in reverse order
destinationAddressLastOffset = 0;
flexDma.TCD->SADDR = sourceAddress;
flexDma.TCD->SOFF = sourceAddressOffset;
flexDma.TCD->SLAST = sourceAddressLastOffset;
flexDma.TCD->DADDR = destinationAddress;
flexDma.TCD->DOFF = destinationAddressOffset;
flexDma.TCD->DLASTSGA = destinationAddressLastOffset;
flexDma.TCD->ATTR =
DMA_TCD_ATTR_SMOD(0U)
| DMA_TCD_ATTR_SSIZE(DMA_TCD_ATTR_SIZE_16BIT) // 16bit reads
| DMA_TCD_ATTR_DMOD(destinationModulo)
| DMA_TCD_ATTR_DSIZE(DMA_TCD_ATTR_SIZE_32BIT); // 32bit writes
flexDma.TCD->NBYTES_MLOFFYES =
DMA_TCD_NBYTES_SMLOE
| DMA_TCD_NBYTES_MLOFFYES_MLOFF(minorLoopOffset)
| DMA_TCD_NBYTES_MLOFFYES_NBYTES(minorLoopBytes);
flexDma.TCD->CITER = majorLoopCount; // Current major iteration count
flexDma.TCD->BITER = majorLoopCount; // Starting major iteration count
flexDma.triggerAtHardwareEvent(hw->shifters_dma_channel[SHIFTER_DMA_REQUEST]);
flexDma.disableOnCompletion();
flexDma.interruptAtCompletion();
flexDma.clearComplete();
//Serial.println("Dma setup done");
/* Start data transfer by using DMA */
WR_DMATransferDone = false;
flexDma.attachInterrupt(dmaISR);
flexDma.enable();
//Serial.println("Starting transfer");
dmaCallback = this;
}
}