VGA output via FlexIO (Teensy4)

jmarsh

Well-known member
Even though it took using all 8 timer registers, it turns out it is possible to use one FlexIO module (FlexIO2 in this case) to generate VSYNC, HSYNC and drive 4 data lines to get 16-color VGA output. All the framebuffer data is fetched using DMA (one channel for regular mode, three channels for line-doubling) leaving the CPU free for other tasks. Mono/1bpp mode is also supported if low memory usage is required, using two shift buffers as state registers to broadcast the single pixel bit to all four output pins.

Code:
#include <DMAChannel.h>

/* R2R ladder:
 *
 * GROUND <------------- 536R ----*---- 270R ---*-----------> VGA PIN: R=1/G=2/B=3
 *                                |             |
 * INTENSITY (13) <---536R -------/             |
 *                                              |
 * COLOR: R=11/G=12/B=10  <-----536R------------/
 *
 * VSYNC (34) <---------------68R---------------------------> VGA PIN 14
 *
 * HSYNC (35) <---------------68R---------------------------> VGA PIN 13
 */

// horizontal values must be divisible by 8 for correct operation
typedef struct {
  uint32_t height;
  uint32_t vfp;
  uint32_t vsw;
  uint32_t vbp;
  uint32_t width;
  uint32_t hfp;
  uint32_t hsw;
  uint32_t hbp;
  uint32_t clk_num;
  uint32_t clk_den;
  // sync polarities: 0 = active high, 1 = active low
  uint32_t vsync_pol;
  uint32_t hsync_pol;
} vga_timing;

class FlexIO2VGA {
public:
  FlexIO2VGA(const vga_timing& mode, bool half_height=false, bool half_width=false, unsigned int bpp=4);
  void stop(void);

  // wait parameter:
  // TRUE =  wait until previous frame ends and source is "current"
  // FALSE = queue it as the next framebuffer to draw, return immediately
  void set_next_buffer(const void* source, size_t pitch, bool wait);

  void wait_for_frame(void) {
    unsigned int count = frameCount;
    while (count == frameCount)
      yield();
  }
private:
  void set_clk(int num, int den);
  static void ISR(void);
  void TimerInterrupt(void);

  uint8_t dma_chans[2];
  DMAChannel dma1,dma2,dmaswitcher;
  DMASetting dma_params;

  bool double_height;
  int32_t widthxbpp;
  
  volatile unsigned int frameCount;
};

FLASHMEM FlexIO2VGA::FlexIO2VGA(const vga_timing& mode, bool half_height, bool half_width, unsigned int bpp) {
  frameCount = 0;
  *(portConfigRegister(11)) = 4; // FLEXIO2_D2    RED
  *(portConfigRegister(12)) = 4; // FLEXIO2_D1    GREEN
  *(portConfigRegister(10)) = 4; // FLEXIO2_D0    BLUE
  *(portConfigRegister(13)) = 4; // FLEXIO2_D3    INTENSITY
  *(portConfigRegister(34)) = 4; // FLEXIO2_D29   VSYNC
  *(portConfigRegister(35)) = 4; // FLEXIO2_D30   HSYNC

  dma_chans[0] = dma2.channel;
  dma_chans[1] = dma1.channel;

  memset(dma_params.TCD, 0, sizeof(*dma_params.TCD));
  dma_params.TCD->DOFF = 4;
  dma_params.TCD->ATTR = DMA_TCD_ATTR_DMOD(3) | DMA_TCD_ATTR_DSIZE(2);
  dma_params.TCD->NBYTES = 8;
  dma_params.TCD->DADDR = &FLEXIO2_SHIFTBUF0;
  dma1.triggerAtHardwareEvent(DMAMUX_SOURCE_FLEXIO2_REQUEST0);
  dma2.triggerAtHardwareEvent(DMAMUX_SOURCE_FLEXIO2_REQUEST0);

  dmaswitcher.TCD->SADDR = dma_chans;
  dmaswitcher.TCD->SOFF = 1;
  dmaswitcher.TCD->DADDR = &DMA_SERQ;
  dmaswitcher.TCD->DOFF = 0;
  dmaswitcher.TCD->ATTR = DMA_TCD_ATTR_SMOD(1);
  dmaswitcher.TCD->NBYTES = 1;
  dmaswitcher.TCD->BITER = dmaswitcher.TCD->CITER = 1;

  double_height = half_height;
  widthxbpp = (mode.width * bpp) / (half_width ? 2 : 1);

  set_clk(4*mode.clk_num, mode.clk_den);

  FLEXIO2_CTRL = FLEXIO_CTRL_SWRST;
  asm volatile("dsb");
  FLEXIO2_CTRL = FLEXIO_CTRL_FASTACC | FLEXIO_CTRL_FLEXEN;
  // wait for reset to clear
  while (FLEXIO2_CTRL & FLEXIO_CTRL_SWRST);

  // timer 0: divide pixel clock by 8
  FLEXIO2_TIMCFG0 = 0;
  FLEXIO2_TIMCMP0 = (4*8)-1;
  
  // timer 1: generate HSYNC
  FLEXIO2_TIMCFG1 = FLEXIO_TIMCFG_TIMDEC(1);
  // on = HSW, off = rest of line
  FLEXIO2_TIMCMP1 = ((((mode.width+mode.hbp+mode.hfp)/8)-1)<<8) | ((mode.hsw/8)-1);
  // trigger = timer0, HSYNC=D28
  FLEXIO2_TIMCTL1 = FLEXIO_TIMCTL_TRGSEL(4*0+3) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_PINCFG(3) | FLEXIO_TIMCTL_PINSEL(28) | FLEXIO_TIMCTL_TIMOD(2) | (mode.hsync_pol*FLEXIO_TIMCTL_PINPOL);

  // timer 2: frame counter
  // tick on HSYNC
  FLEXIO2_TIMCFG2 = FLEXIO_TIMCFG_TIMDEC(1);
  FLEXIO2_TIMCMP2 = ((mode.height+mode.vbp+mode.vfp+mode.vsw)*2)-1;
  // trigger = HYSNC pin
  FLEXIO2_TIMCTL2 = FLEXIO_TIMCTL_TRGSEL(2*28) | (mode.hsync_pol * FLEXIO_TIMCTL_TRGPOL) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_TIMOD(3);

  // timer 3: generate VSYNC
  FLEXIO2_TIMCFG3 = FLEXIO_TIMCFG_TIMDIS(2) | FLEXIO_TIMCFG_TIMENA(7);
  // active for VSW lines. 4*total horizontal pixels*vertical sync loength must be <= 65536 to not overflow this timer
  FLEXIO2_TIMCMP3 = (4*mode.vsw*(mode.width+mode.hbp+mode.hsw+mode.hfp))-1;
  // trigger = frame counter, VSYNC=D29
  FLEXIO2_TIMCTL3 = FLEXIO_TIMCTL_TRGSEL(4*2+3) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_PINCFG(3) | FLEXIO_TIMCTL_PINSEL(29) | FLEXIO_TIMCTL_TIMOD(3) | (mode.vsync_pol*FLEXIO_TIMCTL_PINPOL);

  // timer4: count VSYNC and back porch
  // enable on VSYNC start, disable after (VSW+VBP)*2 edges of HSYNC
  FLEXIO2_TIMCFG4 = FLEXIO_TIMCFG_TIMDEC(2) | FLEXIO_TIMCFG_TIMDIS(2) | FLEXIO_TIMCFG_TIMENA(6);
  FLEXIO2_TIMCMP4 = ((mode.vsw+mode.vbp)*2)-1;
  // trigger = VSYNC pin, pin = HSYNC
  FLEXIO2_TIMCTL4 = FLEXIO_TIMCTL_TRGSEL(2*29) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_PINSEL(28) | FLEXIO_TIMCTL_TIMOD(3) | (mode.vsync_pol*FLEXIO_TIMCTL_TRGPOL) | (mode.hsync_pol*FLEXIO_TIMCTL_PINPOL);

  // timer 5: vertical active region
  // enable when previous timer finishes, disable after height*2 edges of HSYNC
  FLEXIO2_TIMCFG5 = FLEXIO_TIMCFG_TIMDEC(2) | FLEXIO_TIMCFG_TIMDIS(2) | FLEXIO_TIMCFG_TIMENA(6);
  FLEXIO2_TIMCMP5 = (mode.height*2)-1;
  // trigger = timer4 negative, pin = HSYNC
  FLEXIO2_TIMCTL5 = FLEXIO_TIMCTL_TRGSEL(4*4+3) | FLEXIO_TIMCTL_TRGPOL | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_PINSEL(28) | FLEXIO_TIMCTL_TIMOD(3) | (mode.vsync_pol*FLEXIO_TIMCTL_PINPOL);

  // timer 6: horizontal active region
  // configured as PWM: OFF for HSYNC+HBP, ON for active region, reset (to off state) when HSYNC occurs (off state covers HFP then resets)
  FLEXIO2_TIMCFG6 = FLEXIO_TIMCFG_TIMOUT(1) | FLEXIO_TIMCFG_TIMDEC(1) | FLEXIO_TIMCFG_TIMRST(4) | FLEXIO_TIMCFG_TIMDIS(1) | FLEXIO_TIMCFG_TIMENA(1);
  FLEXIO2_TIMCMP6 = ((((mode.hsw+mode.hbp)/8)-1)<<8) | ((mode.width/8)-1);
  // trigger = timer0, pin = HSYNC
  FLEXIO2_TIMCTL6 = FLEXIO_TIMCTL_TRGSEL(4*0+3) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_PINSEL(28) | FLEXIO_TIMCTL_TIMOD(2) | (mode.hsync_pol*FLEXIO_TIMCTL_PINPOL);

  // timer 7: output pixels from shifter, runs only when trigger is ON
  FLEXIO2_TIMCFG7 = FLEXIO_TIMCFG_TIMDIS(6) | FLEXIO_TIMCFG_TIMENA(6) | FLEXIO_TIMCFG_TSTOP(2);
  FLEXIO2_TIMCMP7 = ((((64/bpp)*2)-1)<<8) | ((half_width ? 4:2)-1);
  // trigger = timer 6
  FLEXIO2_TIMCTL7 = FLEXIO_TIMCTL_TRGSEL(4*6+3) | FLEXIO_TIMCTL_TRGSRC | FLEXIO_TIMCTL_TIMOD(1);

  // start blank
  FLEXIO2_SHIFTBUF1 = FLEXIO2_SHIFTBUF2 = 0;
  if (bpp == 4) {
    FLEXIO2_SHIFTCFG1 = FLEXIO_SHIFTCFG_PWIDTH(3);
    FLEXIO2_SHIFTCTL1 = FLEXIO_SHIFTCTL_TIMSEL(7) | FLEXIO_SHIFTCTL_SMOD(2);
    // output stop bit when timer disables - ensures black output/zero outside active window
    FLEXIO2_SHIFTCFG0 = FLEXIO_SHIFTCFG_PWIDTH(3) | FLEXIO_SHIFTCFG_INSRC | FLEXIO_SHIFTCFG_SSTOP(2);
    FLEXIO2_SHIFTCTL0 = FLEXIO_SHIFTCTL_TIMSEL(7) | FLEXIO_SHIFTCTL_PINCFG(3) | FLEXIO_SHIFTCTL_PINSEL(0) | FLEXIO_SHIFTCTL_SMOD(2);
    
    FLEXIO2_SHIFTSTATE = 0;
  } else { // bpp==1
    FLEXIO2_SHIFTCFG1 = FLEXIO_SHIFTCFG_PWIDTH(0);
    FLEXIO2_SHIFTCTL1 = FLEXIO_SHIFTCTL_TIMSEL(7) | FLEXIO_SHIFTCTL_SMOD(2);
    FLEXIO2_SHIFTCFG0 = FLEXIO_SHIFTCFG_PWIDTH(0) | FLEXIO_SHIFTCFG_INSRC | FLEXIO_SHIFTCFG_SSTOP(2);
    FLEXIO2_SHIFTCTL0 = FLEXIO_SHIFTCTL_TIMSEL(7)  | FLEXIO_SHIFTCTL_PINCFG(3) | FLEXIO_SHIFTCTL_PINSEL(8) | FLEXIO_SHIFTCTL_SMOD(2);

    // D8 clear = use state 2, D8 set = use state 3
    // note that PWIDTH does not seem to mask D4-7 outputs as documented!
    FLEXIO2_SHIFTBUF2 = 0x0069A69A;
    FLEXIO2_SHIFTCFG2 = FLEXIO_SHIFTCFG_PWIDTH(15);
    FLEXIO2_SHIFTCTL2 = FLEXIO_SHIFTCTL_TIMSEL(7) | FLEXIO_SHIFTCTL_PINCFG(3) | FLEXIO_SHIFTCTL_PINSEL(8) | FLEXIO_SHIFTCTL_SMOD(6) | FLEXIO_SHIFTCTL_TIMPOL;
    FLEXIO2_SHIFTBUF3 = 0x0F69A69A;
    FLEXIO2_SHIFTCFG3 = FLEXIO_SHIFTCFG_PWIDTH(15);
    FLEXIO2_SHIFTCTL3 = FLEXIO_SHIFTCTL_TIMSEL(7) | FLEXIO_SHIFTCTL_PINCFG(3) | FLEXIO_SHIFTCTL_PINSEL(8) | FLEXIO_SHIFTCTL_SMOD(6) | FLEXIO_SHIFTCTL_TIMPOL;

    FLEXIO2_SHIFTSTATE = 2;
  }

  // clear timer 5 status
  FLEXIO2_TIMSTAT = 1<<5;
  // make sure no other FlexIO interrupts are enabled
  FLEXIO2_SHIFTSIEN = 0;
  FLEXIO2_SHIFTEIEN = 0;
  // enable timer 5 interrupt
  FLEXIO2_TIMIEN = 1<<5;

  attachInterruptVector(IRQ_FLEXIO2, ISR);
  NVIC_SET_PRIORITY(IRQ_FLEXIO2, 32);
  NVIC_ENABLE_IRQ(IRQ_FLEXIO2);

  // start everything!
  FLEXIO2_TIMCTL0 = FLEXIO_TIMCTL_TIMOD(3);
}

FLASHMEM void FlexIO2VGA::stop(void) {
  NVIC_DISABLE_IRQ(IRQ_FLEXIO2);
  // FlexIO2 registers don't work if they have no clock
  if (CCM_CCGR3 & CCM_CCGR3_FLEXIO2(CCM_CCGR_ON)) {
    FLEXIO2_CTRL &= ~FLEXIO_CTRL_FLEXEN;
    FLEXIO2_TIMIEN = 0;
    FLEXIO2_SHIFTSDEN = 0;
  }
  dma1.disable();
  dma2.disable();
  asm volatile("dsb");
}

FLASHMEM void FlexIO2VGA::set_clk(int num, int den) {
  int post_divide = 0;
  while (num < 27*den) num <<= 1, ++post_divide;
  int div_select = num / den;
  num -= div_select * den;

  // valid range for div_select: 27-54

  // switch video PLL to bypass, enable, set div_select
  CCM_ANALOG_PLL_VIDEO = CCM_ANALOG_PLL_VIDEO_BYPASS | CCM_ANALOG_PLL_VIDEO_ENABLE | CCM_ANALOG_PLL_VIDEO_DIV_SELECT(div_select);
  // clear misc2 vid post-divider
  CCM_ANALOG_MISC2_CLR = CCM_ANALOG_MISC2_VIDEO_DIV(3);
  switch (post_divide) {
      case 0: // div by 1
        CCM_ANALOG_PLL_VIDEO_SET = CCM_ANALOG_PLL_VIDEO_POST_DIV_SELECT(2);
        break;
      case 1: // div by 2
        CCM_ANALOG_PLL_VIDEO_SET = CCM_ANALOG_PLL_VIDEO_POST_DIV_SELECT(1);
        break;
      // div by 4
      // case 2: PLL_VIDEO pos_div_select already set to 0
      case 3: // div by 8 (4*2)
        CCM_ANALOG_MISC2_SET = CCM_ANALOG_MISC2_VIDEO_DIV(1);
        break;
      case 4: // div by 16 (4*4)
        CCM_ANALOG_MISC2_SET = CCM_ANALOG_MISC2_VIDEO_DIV(3);
        break;
  }
  CCM_ANALOG_PLL_VIDEO_NUM = num;
  CCM_ANALOG_PLL_VIDEO_DENOM = den;
  // ensure PLL is powered
  CCM_ANALOG_PLL_VIDEO_CLR = CCM_ANALOG_PLL_VIDEO_POWERDOWN;
  // wait for lock
  while (!(CCM_ANALOG_PLL_VIDEO & CCM_ANALOG_PLL_VIDEO_LOCK));
  // deactivate bypass
  CCM_ANALOG_PLL_VIDEO_CLR = CCM_ANALOG_PLL_VIDEO_BYPASS;

  // gate clock
  CCM_CCGR3 &= ~CCM_CCGR3_FLEXIO2(CCM_CCGR_ON);
  // FlexIO2 use vid clock (PLL5)
  uint32_t t = CCM_CSCMR2;
  t &= ~CCM_CSCMR2_FLEXIO2_CLK_SEL(3);
  t |= CCM_CSCMR2_FLEXIO2_CLK_SEL(2);
  CCM_CSCMR2 = t;
  // flex gets 1:1 clock, no dividing
  CCM_CS1CDR &= ~(CCM_CS1CDR_FLEXIO2_CLK_PODF(7) | CCM_CS1CDR_FLEXIO2_CLK_PRED(7));
  asm volatile("dsb");
  // disable clock gate
  CCM_CCGR3 |= CCM_CCGR3_FLEXIO2(CCM_CCGR_ON);
}

void FlexIO2VGA::TimerInterrupt(void) {
  if (dma_params.TCD->SADDR) {
    dma1 = dma_params;
    if (double_height) {
      dma1.disableOnCompletion();
      dmaswitcher.triggerAtCompletionOf(dma1);

      dma2 = dma_params;
      dma2.disableOnCompletion();
      dmaswitcher.triggerAtCompletionOf(dma2);
    }
    dma1.enable();
    // push first pixels into shiftbuf registers
    dma1.triggerManual();
    FLEXIO2_SHIFTSDEN = 1<<0;
  }
  frameCount++;
}

void FlexIO2VGA::set_next_buffer(const void* source, size_t pitch, bool wait) {
  // find worst alignment combo of source and pitch
  size_t log_read;
  switch (((size_t)source | pitch) & 7) {
    case 0: // 8 byte alignment
      log_read = 3;
      break;
    case 2: // 2 byte alignment
    case 6:
      log_read = 1;
      break;
    case 4: // 4 byte alignment
      log_read = 2;
      break;
    default: // 1 byte alignment, this will be slow...
      log_read = 0;
  }
  uint16_t major = (widthxbpp+63)/64;
  dma_params.TCD->SOFF = 1 << log_read;
  dma_params.TCD->ATTR_SRC = log_read;
  dma_params.TCD->SADDR = source;
  dma_params.TCD->SLAST = pitch - (major*8);
  dma_params.TCD->CITER = dma_params.TCD->BITER = major;
  if (wait)
    wait_for_frame();
}

extern FlexIO2VGA FLEXIOVGA;
void FlexIO2VGA::ISR(void) {
  uint32_t timStatus = FLEXIO2_TIMSTAT & 0xFF;
  FLEXIO2_TIMSTAT = timStatus;

  if (timStatus & (1<<5)) {
    FLEXIOVGA.TimerInterrupt();
  }

  asm volatile("dsb");
}

/* END VGA driver code */

static void FillFrameBuffer(uint8_t *fb, int height, int width, int bpp, size_t pitch) {
  const int radius = height/6 - 10;
  static int xoff = radius;
  static int yoff = radius;
  static int xdir = 4;
  static int ydir = 2;
  static uint8_t bg = 8;
  static uint8_t fg = 8;
  const int limit = radius*radius;

  bool hit = false;
  if (xoff >= (int)width-radius) {
    hit = true;
    xdir = -xdir;
    xoff = (int)width-radius;
  }
  if (xoff <= radius) {
    hit = true;
    xdir = -xdir;
    xoff = radius;
  }
  if (yoff >= (int)height-radius) {
    hit = true;
    ydir = -ydir;
    yoff = (int)height-radius;
  }
  if (yoff <= radius) {
    hit = true;
    ydir = -ydir;
    yoff = radius;
  }

  if (hit) {
    if (++fg == 16)
      fg = 8;
    if (++bg == 24)
      bg = 0;
  }

  for (int y=0; y < height; y++) {
    uint8_t *p = fb+y*pitch;
    for (int x=0; x < width;)  {
      uint8_t c=0;
      if (bpp == 1) {
        for (int i=0; i < 8; i++) {
          int xdiff = x-xoff+i;
          int ydiff = y-yoff;
          if ((xdiff*xdiff + ydiff*ydiff) <= limit)
            c |= 1 << i;
        }
        x += 8;
      } else { // bpp=4
        for (int i=1; i >= 0; i--) {
          c <<= 4;
          int xdiff = x-xoff+i;
          int ydiff = y-yoff;
          c |= ((xdiff*xdiff + ydiff*ydiff) <= limit) ? fg : (bg/3);
        }
        x += 2;
      }
      *p++ = c;
    }
  }

  xoff += xdir;
  yoff += ydir;
}

PROGMEM static const vga_timing t1280x720x60 = {
  .height=720, .vfp=13, .vsw=5, .vbp=12,
  .width=1280, .hfp=80, .hsw=40, .hbp=248,
  .clk_num=7425, .clk_den=2400, .vsync_pol=0, .hsync_pol=0
};

PROGMEM static const vga_timing t1024x768x60 = {
  .height=768, .vfp=3, .vsw=6, .vbp=29,
  .width=1024, .hfp=24, .hsw=136, .hbp=160,
  .clk_num=65, .clk_den=24, .vsync_pol=1, .hsync_pol=1
};

PROGMEM static const vga_timing t800x600x100 = {
  .height=600, .vfp=1, .vsw=3, .vbp=32,
  .width=800, .hfp=48, .hsw=88, .hbp=136,
  .clk_num=6818, .clk_den=2400, .vsync_pol=0, .hsync_pol=1
};

PROGMEM static const vga_timing t800x600x60 = {
  .height=600, .vfp=1, .vsw=4, .vbp=23,
  .width=800, .hfp=40, .hsw=128, .hbp=88,
  .clk_num=40, .clk_den=24, .vsync_pol=0, .hsync_pol=0
};

PROGMEM static const vga_timing t640x480x60 = {
  .height=480, .vfp=10, .vsw=2, .vbp=33,
  .width=640, .hfp=16, .hsw=96, .hbp=48,
  .clk_num=150, .clk_den=143, .vsync_pol=1, .hsync_pol=1
};

PROGMEM static const vga_timing t640x400x70 = {
  .height=400, .vfp=12, .vsw=2, .vbp=35,
  .width=640, .hfp=16, .hsw=96, .hbp=48,
  .clk_num=150, .clk_den=143, .vsync_pol=0, .hsync_pol=1
};

PROGMEM static const vga_timing t640x350x70 = {
  .height=350, .vfp=37, .vsw=2, .vbp=60,
  .width=640, .hfp=16, .hsw=96, .hbp=48,
  .clk_num=150, .clk_den=143, .vsync_pol=1, .hsync_pol=0
};

// memory restrictions
#define MAX_WIDTH (1280/2)
#define MAX_HEIGHT 720
#define STRIDE_PADDING 16

static uint8_t frameBuffer0[(MAX_HEIGHT+1)*(MAX_WIDTH+STRIDE_PADDING)];
DMAMEM static uint8_t frameBuffer1[(MAX_HEIGHT+1)*(MAX_WIDTH+STRIDE_PADDING)];
static uint8_t* const s_frameBuffer[2] = {frameBuffer0, frameBuffer1};

const vga_timing *timing = &t640x400x70;
FlexIO2VGA FLEXIOVGA(*timing);

void setup() {
  Serial.begin(9600);
}

void loop() {
  static uint32_t frameBufferIndex;

  static int double_height = false;
  static int double_width = false;
  static int bpp = 4;

  int height = timing->height / (double_height ? 2:1);
  int width = timing->width / (double_width ? 2:1);
  size_t pitch = width*bpp/8 + STRIDE_PADDING;

  int c = Serial.read();
  if (c >= 0) {
    switch (c) {
      case '0':
        timing = &t1280x720x60;
        Serial.println("New mode: 1280x720x60");
        break;
      case '1':
        timing = &t1024x768x60;
        Serial.println("New mode: 1024x768x60");
        break;
      case '2':
        timing = &t800x600x100;
        Serial.println("New mode: 800x600x100");
        break;
      case '3':
        timing = &t800x600x60;
        Serial.println("New mode: 800x600x60");
        break;
      case '4':
        timing = &t640x480x60;
        Serial.println("New mode: 640x480x60");
        break;
      case '5':
        timing = &t640x400x70;
        Serial.println("New mode: 640x400x70");
        break;
      case '6':
        timing = &t640x350x70;
        Serial.println("New mode: 640x350x70");
        break;
      case 'h':
      case 'H':
        double_height = !double_height;
        Serial.print("Height doubling is ");
        Serial.println(double_height ? "ON" : "OFF");
        break;
      case 'w':
      case 'W':
        double_width = !double_width;
        Serial.print("Width doubling is ");
        Serial.println(double_width ? "ON" : "OFF");
        break;
      case 'b':
      case 'B':
        bpp = (bpp==1) ? 4:1;
        Serial.print("Using BPP = ");
        Serial.println(bpp);
        break;
      default:
        Serial.println("0: 1280x720x60");
        Serial.println("1: 1024x768x60");
        Serial.println("2: 800x600x100");
        Serial.println("3: 800x600x60");
        Serial.println("4: 640x480x60");
        Serial.println("5: 640x400x70");
        Serial.println("6: 640x350x70");
        Serial.println("H: Toggle height doubling");
        Serial.println("W: Toggle width doubling");
        Serial.println("B: Toggle bitdepth (4/1)");
      case '\n':
        return;
    }
    FLEXIOVGA.stop();
    FLEXIOVGA = FlexIO2VGA(*timing, double_height, double_width, bpp);
  }

  FillFrameBuffer(s_frameBuffer[frameBufferIndex], height, width, bpp, pitch);
  // DMAMEM framebuffer must be flushed from cache
  if (frameBufferIndex)
    arm_dcache_flush_delete(s_frameBuffer[frameBufferIndex], height*pitch);

  FLEXIOVGA.set_next_buffer(s_frameBuffer[frameBufferIndex], pitch, true);
  frameBufferIndex ^= 1;
}
 
@jmarsh - I am building the VGA R2R ladder for use with this sketch and wanted to make sure I understand the schematic.
This is my version of it only expanded:
Code:
/* R2R ladder:
 *
 * GROUND <------------- 536R ----*---- 270R -----*---------> VGA PIN: Red=1
 *                                |               |
 * INTENSITY (13) <---536R -------/               |
 *                                                |
 * T4-11 <---536R --------------------------------/
 *
 * GROUND <------------- 536R ----*---- 270R -----*---------> VGA PIN: Green=2
 *                                |               |
 * INTENSITY (13) <---536R -------/               |
 *                                                |
 * T4-12 <---536R --------------------------------/
 *
 * GROUND <------------- 536R ----*---- 270R -----*---------> VGA PIN: Blue=3
 *                                |               |
 * INTENSITY (13) <---536R -------/               |
 *                                                |
 * T4-10 <---536R --------------------------------/
 *
 * VSYNC (34) <---------------68R---------------------------> VGA PIN 14 T4=34
 *
 * HSYNC (35) <---------------68R---------------------------> VGA PIN 13 T4=35
 */

Is this correct?

Thanks
 
Well it's working good...

I see the bouncing ball and background changing color when the ball hits the wall. Good quality picture even with my sloppy bread boarding. Need to find or make an adapter board.

Now to start learning about FlexIO...
 
Great to hear that it works for someone else.
FlexIO can definitely be a rabbit hole. The code here isn't what I would call very well documented so I'm happy to answer questions; at some point I want to strip it down to only 1bpp mode and turn it into proper terminal output, giving the Teensy an easy "hostless" debug output using only 3 pins (HSYNC, VSYNC, mono color to R+G+B).
 
Great to hear that it works for someone else.
FlexIO can definitely be a rabbit hole. The code here isn't what I would call very well documented so I'm happy to answer questions; at some point I want to strip it down to only 1bpp mode and turn it into proper terminal output, giving the Teensy an easy "hostless" debug output using only 3 pins (HSYNC, VSYNC, mono color to R+G+B).

Thanks, Been playing with it all day and I think I have a basic understanding of how it works. Next step is setting up for a text terminal and simple graphics. I am going to only use 640x400x70 16 color mode. This leaves about:
Code:
Memory Usage on Teensy 4.1:
  FLASH: code:14216, data:4376, headers:9052   free for files:8098820
   RAM1: variables:5376, code:10392, padding:22376   free for local variables:486144
   RAM2: variables:281888  free for malloc/new:242400
of memory which is plenty enough. Figured out the 8 colors plus intensity and how you are writing to the frame buffer using double buffering.
At my slow pace this should keep me busy for a while:) Also was playing with the Parallax VGA+PS2 mouse and keyboard adapter. It is a 6bit adapter...
 
Last edited:
That's pretty much the exact same size terminal I was going to use, to try out a trick I had in mind:
- allocate 128k (2^17) bytes for the framebuffer and align it to a 128k boundary
- set SMOD in the DMA descriptor to 17, so the upper 15 source address bits stay constant (meaning the DMA source will loop over the 128k framebuffer range if it overruns)
- do easy vertical scrolling just by changing the initial starting address of the DMA transfer, no need to move all the pixel data into the rows above when the terminal scrolls onto a new line

The only tricky part would be getting the framebuffer aligned correctly without wasting space, I haven't figured that out yet.
 
@jmarsh - I have been slowly picking at displaying text with your VGA driver. Below is a modified version of your original sketch (attached below). It's just a minimal messy version at this time. I am using the 640x400X60 timing. The biggest challenge was figuring out how to calculate setting both nibbles in one frame buffer byte for each of the 640 pixels. Basically unpacking each byte that represents two pixels setting the correct nibble color and then repacking the byte:
Code:
// ------------------------------------------------------
// drawPixel()
// x and y are selected pixel coords.
// fg is foreground color.
// ------------------------------------------------------
static void drawPixel(int16_t x, int16_t y, uint8_t fg) {
  uint8_t *fbuff = s_frameBuffer[frameBufferIndex];
  uint8_t c = 0;
  
  c = getPixel(x/2,y); // Get current 4 bit pixel pair. 
  c = (x & 1) ? c & 0x0f : c & 0xf0; // Clear active nibble.
  // Or in nibble color. Shift to upper nibble if x is even number.
  c |= (x & 1) ? (fg << 4) : fg;
  fbuff[(y*pitch)+(x/2)] = c;
}

The x coord is checked for bit #0 being 0 or 1 and if it is 1 then we shift to the upper nibble then or in the foreground color. Otherwise we or in the foreground color into the lower nibble. The nibble that is not target nibble is preserved. This means that 640 pixels are packed into 320 bytes so x is divided by two to access 640 pixels. This was the biggest challenge for me:)
The "drawText()" and "drawRect()" use "drawPixel().

Probably will create a base class for VGA driver then maybe a terminal class and graphics class? I have been testing using the font file found the uVGA library for testing.
Any suggestions or ideas are welcome.

As others would say "Now back to playing"...

View attachment VGA_T4_Text.zip
 
I don't have a board set up with the VGA R2R to play with at the moment but I suspect it might be a very small bit faster with branchless code:
Code:
static void drawPixel(int16_t x, int16_t y, uint8_t fg) {
  uint8_t *fbuff = s_frameBuffer[frameBufferIndex];
  
  unsigned int sel = (x & 1) << 2; // 4 or 0
  uint8_t c = getPixel(x/2,y); // Get current 4 bit pixel pair. 
  // remove old color
  c &= (0xf0 >> sel);
  // insert new color
  c |= fg << sel;
  fbuff[(y*pitch)+(x/2)] = c;
}
 
Been playing some more with this and have a quick question about setting up the timing for 1024x600x60. How did you setup the existing modelines? I was checking it out online but could not associate any modeline calculator with the existing ones...
 
I think I took most of them from here: http://tinyvga.com/vga-timing

For 1024x600 you can take one of the existing 800x600 modes and scale the clock+horizontal timing values by 1024/800 while ensuring the new values stay evenly divisible by 8, like so:

Code:
PROGMEM static const vga_timing t1024x600x60 = {
  .height=600, .vfp=1, .vsw=4, .vbp=23,
  .width=1024, .hfp=56, .hsw=160, .hbp=112,
  .clk_num=32, .clk_den=15, .vsync_pol=0, .hsync_pol=0
};
Completely untested so no guarantees, the clock is fudged making it run slightly faster than 60fps. If the image appears shifted left or right you can try juggling multiples of 8 between hfp and hbp to adjust it.
 
I think I took most of them from here: http://tinyvga.com/vga-timing

For 1024x600 you can take one of the existing 800x600 modes and scale the clock+horizontal timing values by 1024/800 while ensuring the new values stay evenly divisible by 8, like so:

Code:
PROGMEM static const vga_timing t1024x600x60 = {
  .height=600, .vfp=1, .vsw=4, .vbp=23,
  .width=1024, .hfp=56, .hsw=160, .hbp=112,
  .clk_num=32, .clk_den=15, .vsync_pol=0, .hsync_pol=0
};
Completely untested so no guarantees, the clock is fudged making it run slightly faster than 60fps. If the image appears shifted left or right you can try juggling multiples of 8 between hfp and hbp to adjust it.

Thanks for the info - I will give it a try and let you know how it works. I now have pretty much all of uVGA adapted and working. Lot's left to to do though:)

EDIT: 1024x600x60 works...
 
Last edited:
@jmarsh - I have been playing with FlexIO2VGA off and on for the last few months now. It has been a lot of fun:)

I thought you might be interested in what I have come up with so far. I created a library called VGA_4BIT_T4 on Github:
https://github.com/wwatson4506/VGA_4BIT_T4/tree/main

I based most of the graphic primitives and text function from uVGA:
https://github.com/qix67/uVGA

My goal was to optimize memory usage and used just a single frame buffer. I still am using a line by line scroll and it is fairly slow at the higher resolutions. I had a working scroll routine for the RA8875 which uses a circular frame buffer. Can't find it right now. Still looking:(

I have created a few example sketches for the library:

VGA_T4_Editor.ino - Is the most ambitious one. It is an adapted version of the Kilo editor and uses a VT100 driver supported in the VGA_4BIT_T4 library. It requires a USB host keyboard and if using an optional USB host drive a USB hub will be needed.

VGA_T4_GraphicCursor.ino - I an example sketch of a software driven graphic cursor. I requires a USB host mouse.

I also created a 8x16 text cursor that supports various sizes of a block cursor. It can be a variable rate blinking cursor as well.

The only real issue at this point the "fillEllipse()" function fails witth certain parameters, Example: "fillEllipse(516, 419,59,61,VGA_BRIGHT_WHITE)".
It seems the fill goes out of bounds of the ellipse at the top and bottom quarter of the ellipse? This can be seen the VGA_T4_Graphics.ino sketch.

Anyway if you have the hardware still setup I thought you might want to check it out...
 
@wwatson - nice work [and @jmarsh] - haven't played with Teensy VGA since it was working on T_3.6 - was that with the @Frank_B board?
 
@wwatson - nice work [and @jmarsh] - haven't played with Teensy VGA since it was working on T_3.6 - was that with the @Frank_B board?

Thanks and no it was not.
I bread boarded the R2R ladder and VGA connector. I don't think I know about FrankB's board. The schematic is shown above and in the VGA_4bit_T4.ccp file at the beginning of the file. Most of the other VGA drivers use 8 bit R2R ladders which of course uses more memory. The 4 bit version uses less memory as you are packing two pixels into one byte for a total of 16 colors per pixel. Packing and unpacking bytes with two nibbles (pixels) was challenging to say the least. I am wanting to design 4 bit VGA adapter board but have no experience with that. Guess it's time to learn how:)

There are several things I want to add like graphic font library etc...
But I still have a lot of optimizing and bug squashing to do first:)
 
Looking at the color lines of the circuit with fresh eyes, I think it could be simplified to get rid of four resistors?
Code:
/*
 * INTENSITY (13) <-- 536R --\
 *                           |
 * GROUND <------ 536R ------*---- 270R ------------*---> VGA PIN: R=1
 *                           |                      |
 *                           |  RED (11) <--- 536R -/
 *                           |
 *                           *---- 270R ------------*---> VGA PIN: G=2
 *                           |                      |
 *                           |  GREEN (12) <- 536R -/
 *                           |
 *                           *---- 270R ------------*---> VGA PIN: B=3
 *                                                  |
 *                              BLUE (13) <-- 536R -/
 *
 */

I might have a go at implementing scrolling just by adjusting the DMA source... two possible ideas, either aligning the framebuffer to a power of 2 address and using DMA wrapping or splitting it into 2 transfers, one for the "top" portion of the screen and another for the "low" portion (split where it wraps from the bottom of the allocated framebuffer memory to the top). In theory it could also be used to scroll left and right.
 
So that idea (trimming the circuit) does not completely work... there's a little bit of bleed between the channels and the intensity alone isn't as bright as it should be. But it also made me double-check the resistance values, and the ones we're using (270R/536R) aren't right either for 75R terminated VGA; it means the intensity signal is one quarter of the maximum output (0.7V), and the RGB signals are one half - meaning our maximum output is only three quarters (525mV) of the allowed maximum.

If we use 180R/390R instead, we get:
- 237 millivolts when intensity alone is active
- 456 millivolts when R/G/B alone is active
- 693 millivolts when both are active. That's a lot closer to the maximum value of 0.7V.
So I'm going to pick up some packs of those resistors to build a new ladder before I get back into this.
 
Back
Top