During the T4 beta I did some playing around with the Flex IO sub-system that is part of the T4...
With it I have versions of a SPI handler and a Serial port handler.

The code is up on github: https://github.com/KurtE/FlexIO_t4

At some point it might be nice to maybe merge in and probably enhance some of the support code into the T4 core files or some standard library, and potentially maybe use this support to build into some standard library like SoftwareSerial.

But after having a discussion this morning up on another thread about having an array of HardwareSerial objects and pros and cons versus array of Streams, I thought I would hack up a simple sketch, which tries to output a buffer of N bytes out to a lot of IO pins, like all of the SerialX objects on T4 as well as several ones built up from Flex IO...

I ran into some bugs in my library code. Some of which I have fixed, some I am still debugging.

Right now having an issue with something to do with using either pin 18 or 22 in the list and even when I remove them from being output to, just creating/init of the FlexSerial object gives me some strange stuff on that pin... Again debugging.

But if anyone is curious about this. Here is a quick and dirty sketch that tries to output a buffer currently defined as 128 bytes long as quick as possible to something logically like 13 or 14 serial ports on T4...

Code:
#include <FlexIO_t4.h>
#include <FlexSerial.h>

FlexSerial SerialF1(-1, 2); // currently setup for pin 2 TX
FlexSerial SerialF2(-1, 3); // currently setup for pin 3 TX
FlexSerial SerialF3(-1, 4); // currently setup for pin 4 TX
FlexSerial SerialF4(-1, 5); // currently setup for pin 5 TX
FlexSerial SerialF5(-1, 6); // currently setup for pin 6 TX
FlexSerial SerialF6(-1, 18); // currently setup for pin 18 TX
FlexSerial SerialF7(-1, 22); // currently setup for pin 19 TX

Stream *streams[] = {&Serial1, &Serial2, &Serial3, &Serial4, &Serial5, &Serial6, &Serial7,
                     &SerialF1, &SerialF2, &SerialF3, &SerialF4, &SerialF5, &SerialF6/*, &SerialF7 */
                    };
const uint8_t count_streams = sizeof(streams) / sizeof(streams[0]);

#define BUFFER_SIZE 128
#define BAUD 115200
uint8_t buffer[BUFFER_SIZE];
uint8_t loop_count = 0;


void setup() {
  pinMode(13, OUTPUT);
  pinMode(12, OUTPUT);
  while (!Serial && millis() < 4000);
  Serial.begin(115200);
  delay(250);

  // Lets initialize all of the Hardware Serial ports.
  Serial1.begin(BAUD); // 0, 1
  Serial2.begin(BAUD); // 7, 8
  Serial3.begin(BAUD); // 15, 14
  Serial4.begin(BAUD); // 16, 17
  Serial5.begin(BAUD); // 21, 20
  Serial6.begin(BAUD); // 25, 24
  Serial7.begin(BAUD); // 28, 29
  Serial.println("\nSerialF1 begin");if (!SerialF1.begin(BAUD)) Serial.println("SerialF1 begin failed\n");
  Serial.println("\nSerialF2 begin");if (!SerialF2.begin(BAUD)) Serial.println("SerialF2 begin failed\n");
  Serial.println("\nSerialF3 begin");if (!SerialF3.begin(BAUD)) Serial.println("SerialF3 begin failed\n");
  Serial.println("\nSerialF4 begin");if (!SerialF4.begin(BAUD)) Serial.println("SerialF4 begin failed\n");
  Serial.println("\nSerialF5 begin");if (!SerialF5.begin(BAUD)) Serial.println("SerialF5 begin failed\n");
  Serial.println("\nSerialF6 begin");if (!SerialF6.begin(BAUD)) Serial.println("SerialF6 begin failed\n");
  Serial.println("\nSerialF7 begin");if (!SerialF7.begin(BAUD)) Serial.println("SerialF7 begin failed\n");
  delay(500);

  Serial.println("End Setup");
}

uint8_t loop_char = 'a';
void loop() {
  loop_count++;
  // maybe first 10 bytes will bee loop count;
  memset(buffer, loop_count, 10);
  for (uint16_t bi = 10; bi < sizeof(buffer); bi++) buffer[bi] = bi & 0xff;
  digitalWriteFast(13, HIGH);
  uint16_t buffer_indexes[count_streams] = {0};
  uint32_t start_time = millis();
  bool not_done_yet = true;
  while (not_done_yet) {
    digitalWriteFast(12, HIGH);
    not_done_yet = false; // assume we are done.
    for (uint8_t serial_index = 0; serial_index < count_streams; serial_index++) {
      if (buffer_indexes[serial_index] < sizeof(buffer)) {
        uint16_t cbOutput = sizeof(buffer) - buffer_indexes[serial_index];
        uint16_t cbAvailForWrite = streams[serial_index]->availableForWrite();
        if (cbAvailForWrite < cbOutput) {
          cbOutput = cbAvailForWrite;
          not_done_yet = true;
        }
        streams[serial_index]->write(&buffer[buffer_indexes[serial_index]], cbOutput);
        buffer_indexes[serial_index] += cbOutput;
      }
    }
    delayMicroseconds(10);
    digitalWriteFast(12, LOW);
  }
  // Now lets wait until all of them finish their output
  for (uint8_t serial_index = 0; serial_index < count_streams; serial_index++) {
    streams[serial_index]->flush();
  }
  digitalWriteFast(13, LOW);
  Serial.printf("loop %u time: %u\n", loop_count, millis() - start_time);
  delay(250);
}
For this, I brought out my older Saleae Logic 16 analyzer to show the channels. Channel 10, is the one that is screwing up, where I am not outputting to it, but it still appears to output a start/stop... Investigating. The last channel shown is pin 13, which shows me the full time it took to output on all of the channels. The one above it is pin 12, which shows the different iterations through the loop looking at how much data it can output to each channel.. Note I put in the delay of 10 microseconds to see if I was hanging before or after the output of the last channel...
Click image for larger version. 

Name:	screenshot.jpg 
Views:	54 
Size:	105.7 KB 
ID:	18081

Anyway I thought I would throw this out here as a way to prod myself to look at it again