Hey Manitou,
Since you did this for a Teensy 3.2, I thought I would do the same for the Teensy 3.6.

I am also including the sketch I used to generate the result. Note that I tried to adopt as much from the DMAChannel library as I could, but there are two lines that I had to comment out because it didn't function as I expected it to. The rise time is 463 ns and the fall time is 527 ns.
Code:
#include <DMAChannel.h>
#define BUFFER_SIZE 32
uint16_t MOD_Start = 0x8; // For Teensy 3.6 bus frequency = 60 MHz -> 8 bus clocks. PDB minimum is 8 for DAC. DAC update rate = MOD x 1./(busFrequency) sec.
uint16_t DAC0BufSize = 16;
static volatile uint16_t __attribute__((aligned(16))) squarewave[BUFFER_SIZE];
DMAChannel* dma0 = new DMAChannel(false); // dac object
void setup() {
// Fill the table with two square waves. For BUFFER_SIZE = 32, that is one square wave for each DAC buffer
for(int i = 0; i < BUFFER_SIZE/4; i++) {
squarewave[i] = 0;
squarewave[i + BUFFER_SIZE / 4] = 4095;
squarewave[i + BUFFER_SIZE / 2] = 0;
squarewave[i + 3 * BUFFER_SIZE / 4] = 4095;
}
// Initialize DAC0
SIM_SCGC2 |= SIM_SCGC2_DAC0; // enable DAC clock
DAC0_C0 = DAC_C0_DACEN // DAC Control Register 0 enables enables DAC0. See Section 41.5.4.
| DAC_C0_DACRFS; // Use the 3.3V reference
// Slowly ramp up to mid-range voltage
for (int16_t i = 0; i < 2048; i += 1) {
*(int16_t *)&(DAC0_DAT0L) = i;
delay(1);
}
// Fill up the DAC0 buffer with a downward ramp beginning at 2048
using aliased_uint16 = uint16_t __attribute__((__may_alias__));
using aliased_uint16_vptr = volatile aliased_uint16*;
for (size_t i=0; i<16; i+=1) {
((aliased_uint16_vptr) &DAC0_DAT0L)[i] = 2048-(i*127); //256*(16-i) - 1;
}
// Channel Configuration Register is used to enable one of the DMA channels to one of the DMA slots on the system
// Need to configure the DMA to align with feeding the DAC output channel.
// The DMA Multiplexer allows up to 63 DMA request signals to map to any of the 32 DMA channels. Map DAC as a request source.
// Initialize the DMA
// We are using Channel 0 for driving the DAC.
// Use the System Clock Gating Control Register to initialize and enable the DMA MUX and the DMA clock.
// The DMA multiplexer (DMAMUX) routes DMA sources, called slots, to any of 32 DMA channels.
dma0->begin(true); // Enable requests on DMA channel 0 - See Section 24.3.3
dma0->TCD->SADDR = squarewave; // Set the address of the first byte in the DMA output buffer as the source address. See Section 24.3.18
dma0->TCD->DADDR = &DAC0_DAT0L; // Set the first data register of DAC0 as the destination address. See Section 41.5.
dma0->TCD->SOFF = 4; // advance 32 bits, or 4 bytes per read
dma0->TCD->DOFF = 4; // advance 32 bits, or 4 bytes per read
dma0->TCD->ATTR = DMA_TCD_ATTR_SSIZE(DMA_TCD_ATTR_SIZE_32BIT); // See Section 24.3.20
// dma0->TCD->ATTR = DMA_TCD_ATTR_DSIZE(DMA_TCD_ATTR_SIZE_32BIT); // This doesn't work
// dma0->TCD->ATTR = DMA_TCD_ATTR_DMOD(31 - __builtin_clz(32)); // This doesn't work
DMA_TCD0_ATTR |= DMA_TCD_ATTR_DSIZE(DMA_TCD_ATTR_SIZE_32BIT)
| DMA_TCD_ATTR_DMOD(31 - __builtin_clz(32)); // set the data transfer size to 32 bit for both the source and the destination
// Set the number of bytes transferred per minor loop, or request. See Section 24.3.21
dma0->TCD->NBYTES = 16; //We want to fill half of the DAC buffer, which is 16 words in total, so we need 8 words - or 16 bytes - per transfer
dma0->TCD->SLAST = -2 * BUFFER_SIZE; //Set the size of the DMA transfer (major loop)
dma0->TCD->DLASTSGA = 0;
dma0->TCD->BITER = 2 * BUFFER_SIZE / DAC0BufSize; // CITER value is volatile.
dma0->TCD->CITER = 2 * BUFFER_SIZE / DAC0BufSize; // BITER value is loaded back into the CITER field at the end of the buffer transfer.
dma0->TCD->CSR = 0;
dma0->triggerAtHardwareEvent(DMAMUX_SOURCE_DAC0); //Select DAC as request source #45. See Section 23.1.1. Select DMA Channel 0 as the point of control.
dma0->enable();
// Enable the DAC interrupts and use them to drive the DMA requests
DAC0_C0 |= DAC_C0_DACBWIEN // enable DMA trigger at watermark
| DAC_C0_DACBTIEN; // enable DMA trigger at at top of buffer (when it toggles from 15 to 0)
DAC0_C1 |= DAC_C1_DACBFWM(3) // watermark for DMA trigger
| DAC_C1_DMAEN // Enable the DMA request - See Section 41.5.5
| DAC_C1_DACBFEN ; // Enable the DAC Buffer
DAC0_C2 |= DAC_C2_DACBFRP(0); // Keeps the current value of the buffer read pointer - See Sction 41.5.6
DAC0_C2 |= DAC_C2_DACBFUP(15);
DAC0_SR &= ~(DAC_SR_DACBFWMF); // clear watermark flag
DAC0_SR &= ~(DAC_SR_DACBFRTF); // clear top pos flag
DAC0_SR &= ~(DAC_SR_DACBFRBF); // clear bottom pos flag
DAC0_C2 |= DAC_C2_DACBFRP(12); //Initial condition required for the DAC buffer read pointer
// Set up the PDB
SIM_SCGC6 |= SIM_SCGC6_PDB; // turn on the PDB clock
PDB0_SC |= PDB_SC_PDBEN; // enable the PDB
PDB0_SC |= PDB_SC_TRGSEL(15); // trigger the PDB on software start (SWTRIG)
PDB0_SC |= PDB_SC_CONT; // run in continuous mode
PDB0_MOD = (uint16_t)(MOD_Start - 1);
PDB0_DACINT0 = (uint16_t)(MOD_Start - 1);
PDB0_DACINTC0 |= PDB_DACINTC_TOE; // enable the DAC interval trigger. - See Section 44.4.10
PDB0_SC |= PDB_SC_LDOK; // update pdb registers
PDB0_SC |= PDB_SC_SWTRIG; // ...and start the PDB
}
void loop() {
// Do whatever you want.
}