I'm trying to build a multi-protocol bridge device that will allow users to interface with CAN/SPI/I2C devices through the various UART ports on Teensy 3.6. It seems like a good use case for DMA, since the main purpose is to transfer data between peripherals. I'd like to be able to add timestamps to each data transfer/frame by reading the elapsed microseconds via micros() function.

I'm working with Teensy 3.6, Arduino 1.8.1 and TeensyDuino 1.36 on Windows 10.

Summary:
Using a 21-byte buffer (17 byte frame + 4 byte timestamp)
First DMA channel transfers 17-byte input data from Serial1 to main buffer. In the callback, reads timestamp via micros() into timestamp buffer and then enables the second DMA channel.
Second DMA channel transfers timestamp buffer into main buffer. It waits to be enabled by the first DMA channel.
Third DMA channel triggers at transfers and completions of the first two. Transfers from the main buffer to UART output.

Problem:
I'm getting different behaviour, depending on which UART port I use as a transfer destination. When I output to Serial2, everything works as expected. But when I output to Serial3, the third DMA channel is only transferring the first byte of the 4-byte timestamp. I'm guessing it has something to do with Serial2 having a hardware FIFO and Serial3 not, but that's just a hunch.

Input data on Serial1:
Click image for larger version. 

Name:	UART0_Out.png 
Views:	97 
Size:	21.0 KB 
ID:	10336

Output data on Serial2 (working as expected, with 17-byte frame plus 4-byte timestamp):
Click image for larger version. 

Name:	UART1_Out.png 
Views:	94 
Size:	22.3 KB 
ID:	10338

Output data on Serial3 (not working. Shows 17-byte frame but only the first byte of the timestamp):
Click image for larger version. 

Name:	UART2_Out.png 
Views:	75 
Size:	21.0 KB 
ID:	10337

Code:
#include "DMAChannel.h"


//Comment this out to use UART2 as the output
#define USE_UART1


#define FRAME_SIZE 17
#define TIMESTAMP_SIZE 4


#define BUFFER_SIZE FRAME_SIZE + TIMESTAMP_SIZE


static DMAChannel _readerChan;
static DMAChannel _writerChan;
static DMAChannel _timestampChan;
static volatile uint8_t _uartBuff[BUFFER_SIZE];
static volatile uint8_t _timestampBuff[TIMESTAMP_SIZE];


static void frameCallback()
{
    _readerChan.clearInterrupt();
    uint32_t ts = micros();
    for (int i = 0; i < 4; i++)
    {
        _timestampBuff[i] = (ts >> i * 8) & 0xFF;
    }
    _timestampChan.enable();
}


static void timestampCallback()
{
    _timestampChan.clearInterrupt();
    Serial.println("timestamp callback");
}


static void uartOutCallback()
{
    _writerChan.clearInterrupt();
    Serial.println("uart out callback");
}


void setup()
{
    uint32_t baud = 1000000;


    //UART0 is Serial1
    Serial1.begin(baud);
    Serial.println("DMA source: Serial1");


    //settings copied from UartEvent library
    UART0_C5 = UART_C5_TDMAS | UART_C5_RDMAS; // setup Serial1 tx,rx to use dma
    UART0_TWFIFO = 2; // tx watermark, causes C5_TDMAS DMA request
    UART0_RWFIFO = 1; // rx watermark, causes C5_RDMAS DMA request


#ifdef USE_UART1
    //UART1 is Serial2
    Serial2.begin(baud);


    //this doesn't seem to be needed for rx dma
    //UART1_C5 = UART_C5_TDMAS | UART_C5_RDMAS; // setup Serial2 tx,rx to use dma
    //UART1_TWFIFO = 2; // tx watermark, causes C5_TDMAS DMA request
    //UART1_RWFIFO = 1; // rx watermark, causes C5_RDMAS DMA request
#else
    Serial3.begin(baud);
    
    //this doesn't seem to be needed for rx dma
    //UART2_C5 = UART_C5_TDMAS | UART_C5_RDMAS; // setup Serial3 tx,rx to use dma
    //UART2_TWFIFO = 2; // tx watermark, causes C5_TDMAS DMA request
    //UART2_RWFIFO = 1; // rx watermark, causes C5_RDMAS DMA request
#endif


    //this channel reads incoming bytes on Serial1
    _readerChan.source(UART0_D);
    _readerChan.destinationBuffer(_uartBuff, FRAME_SIZE);
    _readerChan.triggerAtHardwareEvent(DMAMUX_SOURCE_UART0_RX);
    _readerChan.interruptAtCompletion();
    _readerChan.attachInterrupt(frameCallback); //timestamp is captured and _timestampChan gets enabled in frameCallback
    _readerChan.enable();


    //this channel copies the timestamp into the main buffer
    _timestampChan.sourceBuffer(_timestampBuff, TIMESTAMP_SIZE);
    _timestampChan.destinationBuffer(_uartBuff + FRAME_SIZE, TIMESTAMP_SIZE);
    //_timestampChan.interruptAtCompletion(); //callback is just for debugging
    _timestampChan.attachInterrupt(timestampCallback);
    _timestampChan.triggerContinuously();
    _timestampChan.disableOnCompletion();


    //this channel writes to UART whenever a data or timestamp byte is received into _uartBuff
    _writerChan.sourceBuffer(_uartBuff, BUFFER_SIZE);
#ifdef USE_UART1
    Serial.println("DMA destination: Serial2");
    _writerChan.destination(UART1_D); //works properly when destination is UART1 (Serial2)
#else
    Serial.println("DMA destination: Serial3");
    _writerChan.destination(UART2_D); //only transfers the first byte if the timestamp when destination is UART2 (Serial3)
#endif
    _writerChan.triggerAtTransfersOf(_readerChan);
    _writerChan.triggerAtCompletionOf(_readerChan);
    _writerChan.triggerAtTransfersOf(_timestampChan);
    _writerChan.triggerAtCompletionOf(_timestampChan);
    //_writerChan.interruptAtCompletion(); //callback is just for debugging
    _writerChan.attachInterrupt(uartOutCallback);
    _writerChan.enable();


    Serial.println("Enabled DMA.");
}


void loop() {
    
    while (!Serial.available())
    {


    }
    
    Serial.read();
    Serial.println();
    
    Serial.println("UART buffer contents:");
    for (size_t i = 0; i < sizeof(_uartBuff); i++)
    {
        Serial.print(_uartBuff[i], 16);
        Serial.print(" ");
    }
    Serial.println();
    Serial.println("timestamp buffer contents:");
    for (size_t i = 0; i < sizeof(_timestampBuff); i++)
    {
        Serial.print(_timestampBuff[i], 16);
        Serial.print(" ");
    }
    Serial.println();
}