Teensy 4.1 UART Communication: How to use Hardware Serial?

samm_flynn

Active member
EDIT : There was a bug in my previous code, I found just after posting. Editing the question since I have a new question.

Hey everyone,

I'm working on a project involving two Teensy 4.1 boards, where I need to send structured binary data to the master over usb serial, then from the master to the slave over hardware UART (Serial1 → Serial8). The setup works flawlessly when I send data to the slave teensy over USB serial, Can almost send 512KB at 20 MB/s, sometimes faster.

But when I switch over to hardware Serial I don't get the same performance, I didn't measure it, but the delay is order of magnitude higher.

I was wondering is it because of the way coded it or hardwareSerial is significantly slower by design.

Master Teensy Code -
C++:
int packetIndex = 0;
bool inPacket = false;

// Serial port initialization
void setup()
{
  Serial.begin(921600);
  while (!Serial) { delay(1); }
  Serial1.begin(921600);

}

void loop()
{
  if (Serial.available() >= 8)
  {
    byte startByte = Serial.read();
    byte commandByte = Serial.read();
    byte axisId = Serial.read();
    byte headerByte = Serial.read();

    uint32_t payloadSize;
    Serial.readBytes(reinterpret_cast<char *>(&payloadSize), sizeof(payloadSize));
    uint8_t payloadBuffer[payloadSize];
    Serial.readBytes(reinterpret_cast<char *>(payloadBuffer), payloadSize);  // Read payload
    // Read the end byte
    byte endByte = Serial.read();
    Serial.println("\n--- Sending Packet Packet ---");
    Serial.printf("Start Byte: 0x%02X\n", startByte);
    Serial.printf("Command Type: 0x%02X\n", commandByte);
    Serial.printf("Axis ID: %d\n", axisId);
    Serial.printf("Header Byte: 0x%02X\n", headerByte);
    Serial.printf("Payload Size: %u \n", payloadSize);
    Serial.printf("End Byte: 0x%02X\n", endByte);
    Serial.println("-----------------------\n");
    // Allocate buffer dynamically for the full packet
    uint32_t totalPacketSize = 8 + payloadSize + 1;  // Start + command + axisId + header + size(4) + payload + end
    uint8_t fullPacket[totalPacketSize];

    int index = 0;
    fullPacket[index++] = startByte;
    fullPacket[index++] = commandByte;
    fullPacket[index++] = axisId;
    fullPacket[index++] = headerByte;

    fullPacket[index++] = (payloadSize >> 0) & 0xFF;
    fullPacket[index++] = (payloadSize >> 8) & 0xFF;
    fullPacket[index++] = (payloadSize >> 16) & 0xFF;
    fullPacket[index++] = (payloadSize >> 24) & 0xFF;

    for (uint32_t i = 0; i < payloadSize; i++)
    {
      fullPacket[index++] = payloadBuffer[i];
    }

    fullPacket[index++] = endByte;



    Serial.printf("packetSize:%d, Axis:%d\n", totalPacketSize, axisId);
    Serial.print("Sending : ");
    for (uint32_t i = 0; i < totalPacketSize; i++)
    {
      Serial.printf("0x%02X ", fullPacket[i]);
    }
    Serial.println();
    if (axisId == 0)
    {
      int bytesWritten = Serial1.write(fullPacket, totalPacketSize);
      Serial.printf("w:%d\n", bytesWritten);
    }


  }
}





void serialEvent1()
{
  while (Serial1.available() > 0)
    Serial.write(Serial1.read());
}

Slave Teensy Code -
C++:
#define N 63488  // Define max trajectory size
#define IRQ_LINE 5
#define ENABLE_LINE 32
#define ESTOP 33
#include <math.h>
#include "motor_axis.h"
// #include "utils.h"
#include <IntervalTimer.h>
IntervalTimer motor_timer;

// Define the buffer using DMA memory
DMAMEM union
{
  float thetas[N];
  uint8_t rawBytes[sizeof(float) * N];
} thetaBuffer1;

DMAMEM union
{
  float thetas[N];
  uint8_t rawBytes[sizeof(float) * N];
} thetaBuffer2;
// Function prototypes
void processPacket();
void handlePayload(byte headerByte, int payloadSize);
template<typename T> T readPayload(int payloadSize);

const int PROX_INT1 = 38;  // or 2
const int PROX_INT2 = 2;   // Different proximity interrupt for motor2

const int8_t LED_PIN = 13;
const unsigned long CAN_ID1 = 0x01;  // or 1
const unsigned long CAN_ID2 = 0x01;  // Different CAN ID for motor2

const int dir1 = -1;  // or -1
const int dir2 = 1;   // Opposite direction for second motor

MotorAxis<FlexCAN_T4<CAN1, RX_SIZE_8, TX_SIZE_8>> motor1(CAN_ID1, PROX_INT1, dir1, 1);
MotorAxis<FlexCAN_T4<CAN2, RX_SIZE_8, TX_SIZE_8>> motor2(CAN_ID2, PROX_INT2, dir2, 2);

float position_setpoint1, position_setpoint2;
bool automatic = false, manual = false;
uint32_t trajectory_length = 0;
volatile int ArrayIndex;

// Setup function
void setup()
{
  Serial8.begin(921600);  // From Master Teensy
  Serial8.setTimeout(5000);

  Serial2.begin(921600);//CP2102
  Serial2.setTimeout(5000);


  delay(3500);

  if (CrashReport)
    Serial8.println(CrashReport);

  // Initialize both motors
  motor1.init();
  motor1.setVelocity(0);
  motor1.disable();

  motor2.init();
  motor2.setVelocity(0);
  motor2.disable();

  pinMode(IRQ_LINE, INPUT);     // IRQ Line
  pinMode(ENABLE_LINE, INPUT);  // ENABLE LINE
                                // attachInterrupt(digitalPinToInterrupt(IRQ_LINE), sendCmd, RISING);
  // motor_timer.begin(sendCmd, 1000);
  Serial8.println("<READY>");
  Serial2.println("<READY>");
}


void sendCmd()
{
  // if (digitalReadFast(ENABLE_LINE))  // USE A Static Inline function defination here.
  if (true)  // USE A Static Inline function defination here.
  {
    if (automatic && !manual)  // USE A Static Inline function defination here.
    {
      ArrayIndex = (ArrayIndex + 1) % (trajectory_length);  // Wraps back to 0 when exceeding maxValue
      if (ArrayIndex % 100 == 0)
      {
        Serial8.print("Idx:");
        Serial8.println(ArrayIndex);
      }
    }
  }
  if (motor1.armed && motor1.calibrated)
  {
    if (manual && !automatic)
    {
      motor1.setPosition(position_setpoint1);
    }
    else if (automatic && !manual)
    {
      motor1.setPosition(thetaBuffer1.thetas[ArrayIndex]);
    }
  }

  if (motor2.armed && motor2.calibrated)
  {
    if (manual && !automatic)
    {
      motor2.setPosition(position_setpoint2);
    }
    else if (automatic && !manual)
    {
      motor2.setPosition(thetaBuffer2.thetas[ArrayIndex]);
    }
  }
}
// Main loop
void loop()
{
  if (Serial8.available() >= 10)
    processPacket();
}

// Function to process an incoming Serial8 packet
void processPacket()
{
  uint32_t payloadSize;
  Serial8.println("\n--- Received Packet ---");
  Serial2.println("\n--- Received Packet ---");

  byte startByte = Serial8.read();
  Serial8.printf("Start Byte: 0x%02X\n", startByte);
  Serial2.printf("Start Byte: 0x%02X\n", startByte);

  byte commandByte = Serial8.read();
  Serial8.printf("Command Type: 0x%02X\n", commandByte);
  Serial2.printf("Command Type: 0x%02X\n", commandByte);

  byte axisId = Serial8.read();
  Serial8.printf("Axis ID: %d\n", axisId);
  Serial2.printf("Axis ID: %d\n", axisId);

  byte headerByte = Serial8.read();
  Serial8.printf("Header Byte: 0x%02X\n", headerByte);
  Serial2.printf("Header Byte: 0x%02X\n", headerByte);

  Serial8.readBytes(reinterpret_cast<char *>(&payloadSize), sizeof(payloadSize));
  Serial8.printf("Payload Size: %u \n", payloadSize);
  Serial2.printf("Payload Size: %u \n", payloadSize);

  handlePayload(headerByte, payloadSize, axisId);

  byte endByte = Serial8.read();
  Serial8.printf("End Byte: 0x%02X\n", endByte);
  Serial2.printf("End Byte: 0x%02X\n", endByte);
  Serial8.println("-----------------------\n");
  Serial2.println("-----------------------\n");
}

// Handles different payload commands based on the header byte
void handlePayload(byte headerByte, int payloadSize, byte axisId)
{
  String commandType;
  switch (headerByte)
  {
    case 0x87:  // Trajectory
      {
        if (axisId == 0)
        {
          // Allocate temporary buffer in stack memory
          uint8_t tempBuffer[payloadSize];

          // Read data once into tempBuffer
          if (Serial8.readBytes(reinterpret_cast<char *>(tempBuffer), payloadSize) != static_cast<size_t>(payloadSize))
          {
            Serial8.println("Error: Array Size Mismatch");
          }
          else
          {
            // Copy the same data into both thetaBuffer1 and thetaBuffer2
            memcpy(thetaBuffer1.rawBytes, tempBuffer, payloadSize);
            memcpy(thetaBuffer2.rawBytes, tempBuffer, payloadSize);
          }
        }
        else if (axisId == 1)
        {
          if (Serial8.readBytes(reinterpret_cast<char *>(thetaBuffer1.rawBytes), payloadSize) != static_cast<size_t>(payloadSize))
            Serial8.println("Error: Array Size Mismatch");
        }
        else if (axisId == 2)
        {
          if (Serial8.readBytes(reinterpret_cast<char *>(thetaBuffer2.rawBytes), payloadSize) != static_cast<size_t>(payloadSize))
            Serial8.println("Error: Array Size Mismatch");
        }

        commandType = "Trajectory(" + String(payloadSize) + ")";
        break;
      }

    case 0x86:  // setPosition
      {
        float val = readPayload<float>(payloadSize);
        commandType = "setPosition(" + String(val, 6) + ")";
        if (axisId == 0 || axisId == 1)
        {
          position_setpoint1 = val * M_PI / 180;
        }
        if (axisId == 0 || axisId == 2)
        {
          position_setpoint2 = val * M_PI / 180;
        }
        break;
      }

    case 0x88:  //TrajectoryLength
      {
        uint32_t trajLength = readPayload<uint32_t>(payloadSize);
        trajectory_length = trajLength;
        commandType = "TrajectoryLength(" + String(trajLength) + ")";
        break;
      }

    case 0x89:  //FeedRate
      {
        byte feedRate = readPayload<byte>(payloadSize);
        commandType = "FeedRate(" + String(feedRate) + ")";
        break;
      }

    case 0x80:  //Reboot
      {
        byte rebootVal = readPayload<byte>(payloadSize);
        commandType = "Reboot(" + String(rebootVal) + ")";
        break;
      }

    case 0x81:  //eStop
      {
        byte eStopVal = readPayload<byte>(payloadSize);
        commandType = "eStop(" + String(eStopVal) + ")";
        break;
      }

    case 0x82:  //Enable
      {
        byte enableVal = readPayload<byte>(payloadSize);
        commandType = "Enable(" + String(enableVal) + ")";
        if (axisId == 0 || axisId == 1)
          motor1.enable();
        if (axisId == 0 || axisId == 2)
          motor2.enable();
        break;
      }

    case 0x83:  //Disable
      {
        byte disableVal = readPayload<byte>(payloadSize);
        commandType = "Disable(" + String(disableVal) + ")";
        if (axisId == 0 || axisId == 1)
          motor1.disable();
        if (axisId == 0 || axisId == 2)
          motor2.disable();
        break;
      }

    case 0x84:  // Calibrate
      {
        byte calibrateVal = readPayload<byte>(payloadSize);
        commandType = "Calibrate(" + String(calibrateVal) + ")";
        if (axisId == 0 || axisId == 1)
          motor1.calibrate();
        if (axisId == 0 || axisId == 2)
          motor2.calibrate();
        break;
      }

    case 0x85:  //Mode
      {
        byte modeVal = readPayload<byte>(payloadSize);
        if (modeVal == 0)
        {
          manual = true;
          automatic = false;
        }
        else if (modeVal == 1)
        {
          automatic = true;
          manual = false;
        }
        commandType = "Mode(" + String(modeVal) + ")";
        break;
      }

    default:
      commandType = "Unknown Command";
      break;
  }

  Serial8.printf("axis ID : %d -> %s\n", axisId, commandType.c_str());
  Serial2.printf("axis ID : %d -> %s\n", axisId, commandType.c_str());
}

// Generic function to read payload data
template<typename T>
T readPayload(int payloadSize)
{
    T value;
    if (static_cast<size_t>(payloadSize) < sizeof(T))  // Cast payloadSize to avoid signed/unsigned mismatch
    {
        Serial1.println("Error: Payload size mismatch!");
        return 0;
    }
    Serial.readBytes(reinterpret_cast<char *>(&value), sizeof(T));
    return value;
}

void ext_output1(const CAN_message_t &msg)
{
  if (msg.bus == 1)
    motor1.CanRxHandler(msg);
  if (msg.bus == 2)
    motor2.CanRxHandler(msg);
}
 
Last edited:
Hardware serial is literally an order or two of magnitude slower than USB serial. The USB serial link is limited only by the bus bandwidth. On USB2 that's around 480mbs. You won't be likely to get quite 480mbs but as you found, 20 megabytes per second is possible.

The hardware serial, on the other hand, will probably only work up to a couple of mbs and you're basically asking it for just a little shy of 1mbs. So, 480x slower. The serial speed you're asking for is right around 90 kilobytes per second so this is MUCH slower than the USB speed.

You will need to account for this speed difference.
 
Not only is hardware serial physically about 500 times slower, but it also has far more CPU overhead than USB.

The serial ports on Teensy do at least have 4 byte FIFO. I believe we currently configure the interrupt watermark so half the FIFO reduces interrupt rate and the other half protects against unexpected interrupt latency. But even if you used all 4 bytes for efficiency (a rather risk design if you run at the highest baud rates) you're still looking at an interrupt and CPU overhead every 4 bytes.

USB at 480 Mbit/sec uses 512 byte packets. It's been a while since I've worked on the low-level code, but I believe we use a group 2048 byte buffers. But even if there's an interrupt for each packet, you're taking CPU time for an interrupt far less often. The USB hardware is also far more efficient, using bus master DMA to put the incoming data directly into the memory buffer.

Hardware serial can be used with the generic DMA controller, though it's not the more efficient bus master type. At least a couple people have made it work and shared code on this forum. Usually this involves more specific usage, since generic DMA needs to know data sizes in advance. DMA can't solve the fact that hardware serial is still at best 100 times slower, but it can at least mitigate much of the excessive CPU overhead that occurs when you push hardware serial to its faster baud rates. If you're running all 8 serial ports concurrently at high baud rates, you'll probably need that sort of efficiency (to reliably achieve the ~100 slower than USB capability).
 
Could Ethernet be a viable solutions in such cases? Where high speed data transfer is required?

I mean like underlying hardware wise , does Ethernet have more buffer?
4 bytes is unbelievably small !
 
Yes, the built in Ethernet on Teensy 4.1 uses efficient bus master DMA and large buffers with QNEthernet library. Many people have reported achieving nearly the full 100 Mbit bandwidth when communicating on a LAN. Speed over the internet with higher packet latency varies rarely comes close to full theoretical hardware speed due to limited RAM on microcontrollers.
 
To add more details, in the QNEthernet library, there are 5 frame buffers for RX and TX, and each buffer is 1536 bytes. I haven’t done any tuning for that number (5).
 
This is why it's always wise explain the context of your questions. If you just ask a narrowly focused question about speed, you'll (probably) miss out regarding answers like hardware serial simply not being meant for many Mbit/sec speed data (on any boards, not just Teensy). Better answers can really make the difference between starting your project in a way that's feasible versus wasting a lot of time only to learn later that the approach wasn't viable.
 
This is why it's always wise explain the context of your questions. If you just ask a narrowly focused question about speed, you'll (probably) miss out regarding answers like hardware serial simply not being meant for many Mbit/sec speed data (on any boards, not just Teensy). Better answers can really make the difference between starting your project in a way that's feasible versus wasting a lot of time only to learn later that the approach wasn't viable.
|
Fair point, A bit of background for anyone coming across this post:

I have a robot with six motor controllers, and the entire control system is developed and tuned to operate at a sampling frequency of 1 kHz.

Due to the setup, I am using one Teensy per two axes, along with a master Teensy that sends a PWM synchronization signal at 1 kHz. The slave Teensies run the control algorithm based on this external pulse.

For communication, I need to send 6 floats (24 bytes) + 8 bytes of additional data, totaling 32 bytes per cycle at 1 kHz.
The feedback from the onboard sensors consists of 16 bytes per axis for 6 axes, totaling 96 bytes per cycle.

Since the robot is intended to run for prolonged periods, the Teensies cannot store all reference and feedback data in RAM. Therefore, I plan to implement a serial router subroutine on the master Teensy to handle data transfer between the PC and the slave Teensies for control and logging.

With three slave Teensies, the master needs to handle an aggregate 384 kB/s of data transfer.

For each slave Teensy, I need to receive data at 32 kB/s and send data back to the master at 96 kB/s, resulting in a total communication rate of 128 kB/s per slave. which is just over 1M buad. I might need to send feedback over for every two sampled averaged. Or I might try out the ethernetnet and a switch.


Ok I have osme work to do.
 
So, for every 1-ms control cycle, the master sends 36 bytes to each slave and receives 92 bytes. It's at least 10-bits per byte, so that is 1,280,000 bps. If the communication is half duplex, you'll need at least 2M baud. Are you just bread-boarding this for now, or actually building it? Will it be RS-232 or RS-485? I recommend using the SerialTransfer library. It will construct/deconstruct packets with CRC, with fairly low overhead, so you'll know for sure, at both ends, that good data has been received. It also has provision for a "Packet ID", so you can have different "data" packets and later you can add "configuration" packets, which you'll probably need. With serial communication, I always try to scale floats to 16-bit integers, which is almost always enough resolution for control. That could reduce your data rate by 1/3 or more.
 
So, for every 1-ms control cycle, the master sends 36 bytes to each slave and receives 92 bytes. It's at least 10-bits per byte, so that is 1,280,000 bps. If the communication is half duplex, you'll need at least 2M baud. Are you just bread-boarding this for now, or actually building it? Will it be RS-232 or RS-485?
I am using very short wires(<15 cm), with a teensy breakout boards and screw connectors for holding wires in place, intend to use rs 422 in the future, once everything is final.
I recommend using the SerialTransfer library. It will construct/deconstruct packets with CRC, with fairly low overhead, so you'll know for sure, at both ends, that good data has been received. It also has provision for a "Packet ID", so you can have different "data" packets and later you can add "configuration" packets, which you'll probably need. With serial communication
Considering SerialTransfer , I might mod it a little to use crc32 for very large packets I intend to send.
I always try to scale floats to 16-bit integers, which is almost always enough resolution for control. That could reduce your data rate by 1/3 or more.
My supervisor mentioned that as well, might end up doing that.
 
I just realized can't use Ethernet, because teensy 4.1 I have doesn't have chip soldered (NE version I believe).
Can I use my master teensy as USB host as a drop in replacement in that case?
just to give a picture, this is what my working master teensy code looks like -
C++:
#define CHUNK_SIZE 128  // Define the byte chunk size
#define N 60000
byte packetBuffer[240000 + 9];  // Main buffer for packet storage
void processIncomingData(){
  //startByte(1), write/read(1), axis_id(1), header(1), payload_size(4), payload(N), endByte(1)
  uint32_t payloadSize = 0;
  if (Serial.available() >= 8){
    Serial.readBytes(reinterpret_cast<char*>(packetBuffer), 8);  // Read 8-byte header
    memcpy(&payloadSize, &packetBuffer[4], sizeof(payloadSize));
    Serial.readBytes(reinterpret_cast<char*>(packetBuffer + 8), payloadSize);
    Serial.readBytes(reinterpret_cast<char*>(&packetBuffer[8 + payloadSize]), 1);
    routePacket(packetBuffer[2], packetBuffer, 9 + payloadSize);
  }
}
void routePacket(byte axisID, uint8_t* fullPacket, uint32_t packetSize)
{
  HardwareSerial* targetSerial = nullptr;
  switch (axisID) {
    case 1:
    case 2:
      targetSerial = &Serial1;
      break;
    case 3:
    case 4:
      targetSerial = &Serial2;
      break;
    case 5:
    case 6:
      targetSerial = &Serial3;
      break;
    default:
      return;  // Invalid axisID, exit function
  }
  if (targetSerial){
    // uint32_t bytesSent = 0;
    // while (bytesSent < packetSize)
    // {
    //   uint32_t spaceAvailable = targetSerial->availableForWrite();  // Check available buffer space
    //   Serial.printf("\nspaceAvailable : %d\n", spaceAvailable);
    //   if (spaceAvailable > 0)
    //   {
    //     uint32_t bytesToSend = (packetSize - bytesSent > spaceAvailable) ? spaceAvailable : (packetSize - bytesSent);
    //     bytesSent += targetSerial->write(fullPacket + bytesSent, bytesToSend);
    //   }
    // }
    // Send the packet in chunks
    for (uint32_t i = 0; i < packetSize; i += CHUNK_SIZE){
      uint32_t remainingBytes = packetSize - i;
      uint32_t bytesToSend = (remainingBytes > CHUNK_SIZE) ? CHUNK_SIZE : remainingBytes;
      targetSerial->write(fullPacket + i, bytesToSend);
    }
    // Send the entire packet in one go
    // targetSerial->write(fullPacket, packetSize);
  }
}

void setup(){
  pinMode(IRQ_PIN, OUTPUT);
  pinMode(ENABLE_PIN, OUTPUT);
  while (!Serial) { delay(1); }
  if (CrashReport){
    Serial.println(CrashReport);
  }
  Serial1.begin(1500000);
  Serial2.begin(1500000);
  Serial3.begin(1500000);
  Serial.println("<MASTER READY>");
}
void loop(){
}
void serialEvent(){
  processIncomingData();
}
void serialEvent1(){
  if (Serial1.available()) 
    Serial.printf("<<%s\n", Serial1.readStringUntil('\n').c_str());
}
void serialEvent2(){
  if (Serial2.available())
  Serial.printf("<<%s\n", Serial2.readStringUntil('\n').c_str());
}
void serialEvent3(){
  if (Serial3.available())
    Serial.printf("<<%s\n", Serial3.readStringUntil('\n').c_str());
}
 
Falling back to my default question when someone is having trouble implementing something tricky. Are you sure you need this? Why 1 kHz? How fast is the robot moving that you need 1kHz control and feedback to and from each node?
Very little physical motion requires control at that frequency, things simply don't move that fast. If it's for some form of timing critical control loop then it would make more sense (and probably be more reliable) to run that control loop locally and only send parameters/target locations at a far slower rate.

If it's purely for time sync reasons you can do that with a digital signal rather than with data. It's faster, more accurate and simpler.

Assuming you do need that much data how different are the values each time? Could you get away with only sending 32 absolute values some of the time and send 16 or 8 bits of delta the rest of the time.
Also keep in mind that while convenient you don't need to stick to 32 or 16 bit values. You can always send 24 bit values if that's all you need, it doesn't even need to be a multiple of 8 for each value, e.g. you could send 3 10 bit values in 4 bytes if needed. As long as you are consistent in how you pack and unpack the data you can pack the bits any way you want. Packing values like this is not normally worth the added software hassle but if you are data bandwidth limited then it's worth looking at.
 
Hi @AndyA, thanks for taking the time to read my question.
Falling back to my default question when someone is having trouble implementing something tricky. Are you sure you need this?
The need for 1 kHz control is actually beyond my control. If it were up to me, I would have stuck with something like 50 Hz. The reason for 1 kHz control is that the trajectory my robot (a Stewart platform) needs to follow involves impact-like events—that's the main goal of the project right now: simulating impacts. A lot of things influenced on why I chose 1KHz smapling frequency.

Why 1 kHz? How fast is the robot moving that you need 1kHz control and feedback to and from each node?
Very little physical motion requires control at that frequency, things simply don't move that fast.
A with a controller sampling rate of 1Khz, enables me to do position control at 200Hz, main goal of my project is to simulate imapcts. Higher the controller bandwidth, sharper the impacts I can make.
If it's for some form of timing critical control loop then it would make more sense (and probably be more reliable) to run that control loop locally and only send parameters/target locations at a far slower rate.

If it's purely for time sync reasons you can do that with a digital signal rather than with data. It's faster, more accurate and simpler.
For syncing different axes, I'm already using several digital signals. For example, the master Teensy generates a 2 kHz square wave, and the three slave Teensys detect the rising edge and only send a position command at that moment. There’s another pulse line that controls the rate at which data is read from the ring buffer, along with a catch-up line (open collector) that can override the pulse line. Also, there are standard lines for enable, emergency stop, etc.

The trajectories I need to run are very long, so they can’t fit in the Teensy's memory. My plan is to stream the data from the PC to the Teensys while using a ring buffer on the slave Teensys.
Assuming you do need that much data how different are the values each time? Could you get away with only sending 32 absolute values some of the time and send 16 or 8 bits of delta the rest of the time.
Considering this, but my motor drivers take in absolute values. so gotta do this in software.

Also keep in mind that while convenient you don't need to stick to 32 or 16 bit values. You can always send 24 bit values if that's all you need, it doesn't even need to be a multiple of 8 for each value, e.g. you could send 3 10 bit values in 4 bytes if needed. As long as you are consistent in how you pack and unpack the data you can pack the bits any way you want. Packing values like this is not normally worth the added software hassle but if you are data bandwidth limited then it's worth looking at.

The feedback, I am planning to exactly what you said, might use 16 bit values.
 
You can always add 8 / 16 MB of extra RAM by adding a PSRAM chip or two. That allow you to buffer a lot more data on the board either before or after. That's still only a few seconds worth at your rates so it may not be enough for everything but means you only have to worry about average data rate rather than peak.

Could you use a variable rate? Lower update rate for the initial operation and then higher during the collision? In that situation the higher rate during the collision could be buffered and then offloaded at a lower speed after the event.

CAN-FD offers a higher data rate than the uart but will require extra drivers. Since CAN is designed to be a common bus between all nodes and is broadcast based this may be a good option if you need to send the same data to all the nodes but probably not a good choice if each node needs to send it's own unique data back to the master node. Running independent busses to each device isn't an option since there is only 1 FD interface rather than 8 hardware uarts.

How about a daisy chain of USB serial ports? Each teensy has a USB device and a USB host port. The host port supports serial devices. So can you run USB the whole way? That would give you far more bandwidth than physical uarts.

edit - sorry, that ended up being a bit of a random thoughts collection didn't it.
 
You can always add 8 / 16 MB of extra RAM by adding a PSRAM chip or two. That allow you to buffer a lot more data on the board either before or after. That's still only a few seconds worth at your rates so it may not be enough for everything but means you only have to worry about average data rate rather than peak.
Agreed, already ordered the RAM chips.
Could you use a variable rate? Lower update rate for the initial operation and then higher during the collision? In that situation the higher rate during the collision could be buffered and then offloaded at a lower speed after the event.
By design, the system runs at constant rate. and the impacts are continuous as well, they contain high frequency content, but repeat at 3 Hz.

CAN-FD offers a higher data rate than the uart but will require extra drivers. Since CAN is designed to be a common bus between all nodes and is broadcast based this may be a good option if you need to send the same data to all the nodes but probably not a good choice if each node needs to send it's own unique data back to the master node. Running independent busses to each device isn't an option since there is only 1 FD interface rather than 8 hardware uarts.
For the reason mentioned, CAN is not an option in my opinion as well.
How about a daisy chain of USB serial ports? Each teensy has a USB device and a USB host port. The host port supports serial devices. So can you run USB the whole way? That would give you far more bandwidth than physical uarts.
This seems like the only viable solution at the moment. I just tired the USBHost_t36 example with a pair of spare teensy 4.1, seems promising.

Even if I add three of them, the master teensy, connected to 3 slave teensy via, idk how the USB bus spilts the speed between the nodes, but the max 480 mbit/s results in 160mbit/s or 80mbit/s one way, should be more than enough to run things at full speed.

I just need to find out, how to connect 3 teensy to one master teensy as a usb serial device, and do I need a hub? I will play with the USBHost_t36 library.

edit - sorry, that ended up being a bit of a random thoughts collection didn't it.
I am beyond grateful for taking the time to even read my post, much appriciated.
 
@samm_flynn Have tried the USB_Host Serial some - not sure and don't recall how extensively.

For 'normal use' (or a test to be performed) use the 'master' would attach USB_Host to a powered USB Hub.
Any desired 'devices' would plug the device USB port into that Hub.
The devices would see the master HOST for data transfer when it connects to them
 
@samm_flynn Have tried the USB_Host Serial some - not sure and don't recall how extensively.

For 'normal use' (or a test to be performed) use the 'master' would attach USB_Host to a powered USB Hub.
Any desired 'devices' would plug the device USB port into that Hub.
The devices would see the master HOST for data transfer when it connects to them
I have ran the examples, will share a verison of the serial router soon, differnce will be, instead of hadware serial, will use the USBHost_t36 to route packets, Ethernet was another attractive options, for my use case at least, but I bought 12 teensy 4.1 without the chip unfortunately.
 
I have ran the examples, will share a verison of the serial router soon, differnce will be, instead of hadware serial, will use the USBHost_t36 to route packets, Ethernet was another attractive options, for my use case at least, but I bought 12 teensy 4.1 without the chip unfortunately.
C++:
#include <USBHost_t36.h>
byte packetBuffer[240009];

USBHost myusb;

USBHub hub1(myusb);

USBSerial_BigBuffer slaveSerial1(myusb, 1);
USBSerial_BigBuffer slaveSerial2(myusb, 2);
// USBSerial_BigBuffer slaveSerial3(myusb, 3);
// Global buffers for accumulating partial lines
String buffer1 = "";
String buffer2 = "";
String buffer3 = "";

void setup()
{
  while (!Serial) { delay(1); }
  if (CrashReport)
  {
    Serial.println(CrashReport);
  }

  myusb.begin();
  slaveSerial1.begin(480000000);
  slaveSerial2.begin(480000000);
  // slaveSerial3.begin(480000000);
  Serial.println("<ROUTER READY>");
}

void loop()
{
  myusb.Task();
  processSerial(slaveSerial1, buffer1, "1");
  processSerial(slaveSerial2, buffer2, "2");
  // processSerial(slaveSerial3, buffer3, "3");
}
void processSerial(USBSerial_BigBuffer& serial, String& buffer, const char* portLabel)
{
  // Read at most 64 bytes per call, TODO : look at slave code determine max chars and get rid of String, even better if fixed sized status updates. maybe space paddding or sth
  const size_t chunkSize = 64;
  if (serial.available() > 0)
  {
    size_t availableBytes = serial.available();
    size_t toRead = (availableBytes < chunkSize) ? availableBytes : chunkSize;
    char chunk[chunkSize + 1];  // Extra byte for null-termination
    size_t bytesRead = serial.readBytes(chunk, toRead);
    chunk[bytesRead] = '\0';
    buffer.concat(chunk);
  }
  // Process one complete line (if available) and then return.
  int newlineIndex = buffer.indexOf('\n');
  if (newlineIndex != -1)
  {
    String line = buffer.substring(0, newlineIndex);
    Serial.printf("<< %s: %s\n", portLabel, line.c_str());
    buffer = buffer.substring(newlineIndex + 1);
  }
}
void serialEvent()
{
  processIncomingData();
}

void processIncomingData()
{
  uint32_t payloadSize = 0;
  // before payload 8 bytes
  if (Serial.available() >= 8)
  {
    // Read 8 bytes of header (payload size is stored in bytes 4-7 - uint32_t)
    Serial.readBytes(reinterpret_cast<char*>(packetBuffer), 8);
    memcpy(&payloadSize, &packetBuffer[4], sizeof(payloadSize));
    uint32_t start = micros();
    Serial.readBytes(reinterpret_cast<char*>(packetBuffer + 8), payloadSize);  // Read the payload
    Serial.printf("Recv: %.2f MB/s\n", (float)payloadSize / ((micros() - start) / 1e6f) / (1024.0f * 1024.0f));
    Serial.readBytes(reinterpret_cast<char*>(&packetBuffer[8 + payloadSize]), 1);  // Read the final end byte of the packet
    // TODO : look for startbyte and keep putting till endByte is seen
    if (packetBuffer[2] < 7)
      Serial.printf(">> Routing: %d bytes to Axis: %d\n", 9 + payloadSize, packetBuffer[2]);
    routePacket(packetBuffer[2], packetBuffer, 9 + payloadSize);

    if (packetBuffer[0] == 0x01 && packetBuffer[3] == 0x80 && packetBuffer[8 + payloadSize] == 0x04)
      Reboot();
  }
}

void routePacket(byte axisID, uint8_t* fullPacket, uint32_t packetSize)
{
  if (axisID == 7)
  {
    processPacket(fullPacket, packetSize);
    return;
  }

  auto sendPacket = [](USBSerial_BigBuffer* userialPort, uint8_t* data, uint32_t size)
  {
    if (userialPort)
    {
      uint32_t start = micros();

      userialPort->write(data, size);
      userialPort->flush();
      Serial.printf("Sent: %.2f MB/s\n", (float)size / ((micros() - start) / 1e6f) / (1024.0f * 1024.0f));
    }
  };

  switch (axisID)
  {
    case 0:
      sendPacket(&slaveSerial1, fullPacket, packetSize);
      sendPacket(&slaveSerial2, fullPacket, packetSize);
      // sendPacket(&slaveSerial3, fullPacket, packetSize);
      break;
    case 1:
    case 2:
      sendPacket(&slaveSerial1, fullPacket, packetSize);
      break;
    case 3:
    case 4:
      sendPacket(&slaveSerial2, fullPacket, packetSize);
      break;
    case 5:
    case 6:
      // sendPacket(&slaveSerial3, fullPacket, packetSize);
      break;
    default:
      break;
  }
}

void Reboot()
{
  Serial.println("Rebooting in...");
  for (int i = 5; i > 0; i--)
  {
    Serial.printf("%d... \n", i);
    delay(1000);
  }
  Serial.println("\nReboot");
  delay(1000);
  USB1_USBCMD = 0;
  SCB_AIRCR = 0x05FA0004;
}

void processPacket(uint8_t* fullPacket, uint32_t packetSize)
{
  // Packet format: startByte(1), write/read(1), axis_id(1), header(1),
  // payload_size(4), payload (N bytes), endbyte(1)
  byte header = fullPacket[3];
  switch (header)
  {
    case 0x89:
      {
        int feedRate = (int)fullPacket[8];
        Serial.printf("Feedrate: %u\n", feedRate);
        break;
      }
    default:
      Serial.printf("NotImplemented: 0x%X\n", header);
      break;
  }
}




If anyone's interested.

Gotta figure out how to detect a master teensy USB connection drop, based on a interrupt instead of polling for Serial.dtr()
 
with 2-3 devices the number of messages/sec the Host can receive should be <edit> tested/observed as it is using scheduled packets on a shared connection to and from host.
 
Last edited:
Back
Top