UDP client stops receiving packets

Lateralleap

Well-known member
Hi,

I am having intermittent problems using QNEthernet to receive UDP messages on a Teensy 4.1

I am firing UDP messages (of 6008 bytes) from a C# test program at the Teensy which is running the sketch below

It works fine for a while, printing out a message at every multiple of 10000 packets

Then after some variable amount of time/ packets, it just hangs

I know the C# application is still sending packets because if I reset the Teensy, it start receiving again

When it hangs, it must be receiving -1 from parsePacket() because I don't get a message logging any other negative return code

I am assuming the EthernetUdp object has failed in some way?

Is there a way to detect this so that I could then perhaps call .begin() again, assuming that would enable it to recover?

If I put a 1ms delay in the client sending loop (between each packet being sent), it seems to not fail but maybe I just did not test it for long enough

I had thought about increasing the queue length?

I am aware that 6008 bytes is over 4 times the size of the MTU max packetr size of 1500 bytes. However the packet splitting/ combining clearly seems to be working to some degree at least?

Thanks




The number of packets seems to vary

HTML:
#include <QNEthernet.h>

using namespace qindesign::network;

const uint32_t kDHCPTimeout = 10000;  // 10 seconds
const uint32_t PayloadLength = 6000;
const uint32_t LocalPort = 8002;

struct __attribute__((packed)) T_MessageHeader
{
  uint32_t messageId;
  uint32_t payloadLength;
};

struct __attribute__((packed)) T_TestMessage
{
  T_MessageHeader messageHeader;
  char payload[PayloadLength];
};

union __attribute__((packed)) T_MessageBuffer
{
  T_TestMessage testMessage;
  char buffer[sizeof(T_TestMessage)];
};

EthernetUDP udp;
unsigned long packetCount = 0;

void setup()
{
  Serial.begin(115200);
  while (!Serial && millis() < 4000)
  {
    // Wait for Serial to initialize
  }
  stdPrint = &Serial;  // Make printf work (a QNEthernet feature)
  printf("Starting...\r\n");

  uint8_t mac[6];
  Ethernet.macAddress(mac);  // This is informative; it retrieves, not sets
  printf("MAC = %02x:%02x:%02x:%02x:%02x:%02x\r\n",
         mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);

  printf("Starting Ethernet with DHCP...\r\n");
  if (!Ethernet.begin())
  {
    printf("Failed to start Ethernet\r\n");
    return;
  }
  
  if (!Ethernet.waitForLocalIP(kDHCPTimeout))
  {
    printf("Failed to get IP address from DHCP\r\n");
    return;
  }

  IPAddress ip = Ethernet.localIP();
  printf("    Local IP    = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.subnetMask();
  printf("    Subnet mask = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.gatewayIP();
  printf("    Gateway     = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.dnsServerIP();
  printf("    DNS         = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);

  // Start UDP listening on the NTP port
  udp.begin(LocalPort);
 }

void loop()
{ 
  int packetSize = udp.parsePacket();

  if ((packetSize < 0) && (packetSize != -1))
  {
    printf("Return code: %d\r\n", packetSize);
  }

  if (packetSize > 0)
  {   
    if (packetSize > (int) sizeof(T_TestMessage))
    {
      printf("Packet size error (expected: %u, actual: %u)\r\n", sizeof(T_TestMessage), packetSize);
    }
    else
    {

      T_MessageBuffer messageBuffer;;

      int dataCount = udp.read(messageBuffer.buffer, packetSize);
      if (dataCount != packetSize)
      {
        printf("Packet read error (expected: %u, actual: %u)\r\n", packetSize, dataCount);
      }
      else
      {
        packetCount++;

        if ((packetCount % 10000) == 0)
        {
          printf("Packet count: %lu\r\n", packetCount);
        }
      }
    }
  }  
}
 
Hi,

I am having intermittent problems using QNEthernet to receive UDP messages on a Teensy 4.1

I am firing UDP messages (of 6008 bytes) from a C# test program at the Teensy which is running the sketch below

It works fine for a while, printing out a message at every multiple of 10000 packets

Then after some variable amount of time/ packets, it just hangs

I know the C# application is still sending packets because if I reset the Teensy, it start receiving again

When it hangs, it must be receiving -1 from parsePacket() because I don't get a message logging any other negative return code

I am assuming the EthernetUdp object has failed in some way?

Is there a way to detect this so that I could then perhaps call .begin() again, assuming that would enable it to recover?

If I put a 1ms delay in the client sending loop (between each packet being sent), it seems to not fail but maybe I just did not test it for long enough

I had thought about increasing the queue length?

I am aware that 6008 bytes is over 4 times the size of the MTU max packetr size of 1500 bytes. However the packet splitting/ combining clearly seems to be working to some degree at least?

Thanks




The number of packets seems to vary

HTML:
#include <QNEthernet.h>

using namespace qindesign::network;

const uint32_t kDHCPTimeout = 10000;  // 10 seconds
const uint32_t PayloadLength = 6000;
const uint32_t LocalPort = 8002;

struct __attribute__((packed)) T_MessageHeader
{
  uint32_t messageId;
  uint32_t payloadLength;
};

struct __attribute__((packed)) T_TestMessage
{
  T_MessageHeader messageHeader;
  char payload[PayloadLength];
};

union __attribute__((packed)) T_MessageBuffer
{
  T_TestMessage testMessage;
  char buffer[sizeof(T_TestMessage)];
};

EthernetUDP udp;
unsigned long packetCount = 0;

void setup()
{
  Serial.begin(115200);
  while (!Serial && millis() < 4000)
  {
    // Wait for Serial to initialize
  }
  stdPrint = &Serial;  // Make printf work (a QNEthernet feature)
  printf("Starting...\r\n");

  uint8_t mac[6];
  Ethernet.macAddress(mac);  // This is informative; it retrieves, not sets
  printf("MAC = %02x:%02x:%02x:%02x:%02x:%02x\r\n",
         mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);

  printf("Starting Ethernet with DHCP...\r\n");
  if (!Ethernet.begin())
  {
    printf("Failed to start Ethernet\r\n");
    return;
  }
  
  if (!Ethernet.waitForLocalIP(kDHCPTimeout))
  {
    printf("Failed to get IP address from DHCP\r\n");
    return;
  }

  IPAddress ip = Ethernet.localIP();
  printf("    Local IP    = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.subnetMask();
  printf("    Subnet mask = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.gatewayIP();
  printf("    Gateway     = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.dnsServerIP();
  printf("    DNS         = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);

  // Start UDP listening on the NTP port
  udp.begin(LocalPort);
 }

void loop()
{ 
  int packetSize = udp.parsePacket();

  if ((packetSize < 0) && (packetSize != -1))
  {
    printf("Return code: %d\r\n", packetSize);
  }

  if (packetSize > 0)
  {   
    if (packetSize > (int) sizeof(T_TestMessage))
    {
      printf("Packet size error (expected: %u, actual: %u)\r\n", sizeof(T_TestMessage), packetSize);
    }
    else
    {

      T_MessageBuffer messageBuffer;;

      int dataCount = udp.read(messageBuffer.buffer, packetSize);
      if (dataCount != packetSize)
      {
        printf("Packet read error (expected: %u, actual: %u)\r\n", packetSize, dataCount);
      }
      else
      {
        packetCount++;

        if ((packetCount % 10000) == 0)
        {
          printf("Packet count: %lu\r\n", packetCount);
        }
      }
    }
  }  
}

maybe put a hard cap on packetSize before calling the udp.read?
Like
if (packetSize > PayloadLength) packetSize = PayloadLength;
 
I effectively do do that. I check if packetSize is greater that the size of the expected packet structure

No I think you don’t. The size of the struct is 8 bytes more.
So you may end up writing into RAM that is not part of the struct. But just above it.
 
Sorry. I am struggling to understand what you mean. The size of the buffer I read into is the size of the structure that includes the payload and the header. The payload is immaterial in this test program. What matters is the overall size of the message being sent which is 6008 bytes
 
If packetSize would be 6001, then where in the buffer that has a only 6000 bytes capacity would the last byte end up?
 
union __attribute__((packed)) T_MessageBuffer
{
T_TestMessage testMessage;
char buffer[sizeof(T_TestMessage)];
};

element buffer in this struct has only 6000 bytes.

beyond the 6000th buffer byte of messageBuffer.buffer, there’s nothing that your udp.read() is allowed to write to. but it will do that when packetSize is >6000…

maybe you meant to write
int dataCount = udp.read(&messageBuffer, packetSize);
where you have
int dataCount = udp.read(messageBuffer.buffer, packetSize);
 
Sorry but I think you have misunderstood the data declarations

const uint32_t PayloadLength = 6000;

struct __attribute__((packed)) T_MessageHeader
{
uint32_t messageId;
uint32_t payloadLength;
};

struct __attribute__((packed)) T_TestMessage
{
T_MessageHeader messageHeader;
char payload[PayloadLength];
};

union __attribute__((packed)) T_MessageBuffer
{
T_TestMessage testMessage;
char buffer[sizeof(T_TestMessage)];
};

T_TestMessage is a structure that combines a header (of length 8 bytes) and a payload buffer that is declared to be of length PayloadLength (which is 6000 bytes)

So T_TestMessage is of length 8 + 6000 = 6008 bytes

The union T_MessageBuffer just provides two alternative ways to access the buffer. Either via the variable testMessage (declared as T_TestMessage so of length 6008 bytes as above) or via the variable buffer (declared as a char array of length sizeof(T_TestMessage) so also 6008 bytes)

So reading into messageBuffer.buffer is reading into that char array which is of length 6008 bytes
 
To put this discussion to bed, please see below:

2023-03-11_07-36-01.jpg
 
To put this discussion to bed, please see below:

View attachment 30582

I see.

Get https://www.wireshark.org/ and confirm that the udp packets are really there still, also when it hangs.
Next step if that’s not giving hints: does it restart after unplugging and re-instering the Ethernet cable? If yes, then consider this from WiKi:

End of frame – physical layer[edit]
The end of a frame is usually indicated by the end-of-data-stream symbol at the physical layer or by loss of the carrier signal; an example is 10BASE-T, where the receiving station detects the end of a transmitted frame by loss of the carrier. Later physical layers use an explicit end of data or end of stream symbol or sequence to avoid ambiguity, especially where the carrier is continually sent between frames; an example is Gigabit Ethernet with its 8b/10b encoding scheme that uses special symbols which are transmitted before and after a frame is transmitted.[6][7]

Interpacket gap – physical layer[edit]
Interpacket gap (IPG) is idle time between packets. After a packet has been sent, transmitters are required to transmit a minimum of 96 bits (12 octets) of idle line state before transmitting the next packet.

Does the Interpacket gap really meet this spec?
 
Thanks Sicco

I have already confirmed the packets are still there

Removing/ reinserting the ethernet cable does not restart it

Resetting the Teensy does restart it

Feels odd that it can typically do ~ 140,000 packets before hanging if it was an interpacket gap problem?

I still feel the EthernetUDP object is failing somehow
 
What I think is happening when it seems to hang is that it is still receiving packets but at a much slower rate

In other words, only 1 in every N packets gets through

I will do some more tests to verify this
 
I’m curious, is it possible that groups of packets come in immediately, one after the other? This is what I was seeing with pixel data (eg. sACN). It’s why I added a “queue size” parameter to another `EthernetUDP(queueSize)` constructor. It’s able to buffer _N_ packets in a circular buffer. What happens if you use a queue? For example:
`EthernetUDP udp(64)`
 
Hi Shawn

I did try queue size in an earlier iteration but just of size 2. I will try 64

However the packet source is a C# application running in a tight loop so not sure why packets would be bunching up and why it would work for 120K packets and then fail

Also once it gets into this state, it never recovers. I need to look the packet rate to prove that it enters a packet loss state

Feels like some sort of resource exhaustion or corruption

I will try queue size and packet rate logging

Thanks
 
Entirely possible. I’ll look more closely when I have a chance. Let me just clarify something: does it actually hang or is it just really slow?
 
I added some code to print the total packet count received and rate/s every 5 seconds that the end of the main loop

It seems to die after about 40000 packets have been received

It is no longer executing the main loop because I get no more output

Here is the code

HTML:
#include <QNEthernet.h>

using namespace qindesign::network;

const uint32_t kDHCPTimeout = 10000;  // 10 seconds
const uint32_t PayloadLength = 6000;
const uint32_t LocalPort = 8002;
const uint32_t PacketTimeoutMillis = 5000;

struct __attribute__((packed)) T_MessageHeader
{
  uint32_t messageId;
  uint32_t payloadLength;
};

struct __attribute__((packed)) T_TestMessage
{
  T_MessageHeader messageHeader;
  char payload[PayloadLength];
};

union __attribute__((packed)) T_MessageBuffer
{
  T_TestMessage testMessage;
  char buffer[sizeof(T_TestMessage)];
};

EthernetUDP udp;
unsigned long ratePacketCount;
unsigned long rateMillis;
unsigned long packetCount;

void setup()
{
  Serial.begin(115200);
  while (!Serial && millis() < 4000)
  {
    // Wait for Serial to initialize
  }
  stdPrint = &Serial;  // Make printf work (a QNEthernet feature)
  printf("Starting...\r\n");

  uint8_t mac[6];
  Ethernet.macAddress(mac);  // This is informative; it retrieves, not sets
  printf("MAC = %02x:%02x:%02x:%02x:%02x:%02x\r\n",
         mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);

  printf("Starting Ethernet with DHCP...\r\n");
  if (!Ethernet.begin())
  {
    printf("Failed to start Ethernet\r\n");
    return;
  }
  
  if (!Ethernet.waitForLocalIP(kDHCPTimeout))
  {
    printf("Failed to get IP address from DHCP\r\n");
    return;
  }

  IPAddress ip = Ethernet.localIP();
  printf("    Local IP    = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.subnetMask();
  printf("    Subnet mask = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.gatewayIP();
  printf("    Gateway     = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.dnsServerIP();
  printf("    DNS         = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);

  // Start UDP listening on the NTP port
  udp.begin(LocalPort);
  packetCount = 0;
  ratePacketCount = 0;
  rateMillis = millis();
 }

void loop()
{
  int packetSize = udp.parsePacket();

  if (packetSize == -1)
  {
    //if ((millis() - lastPacketMillis) >= PacketTimeoutMillis)
    //{
    //  printf("Packet timeout\r\n");
    //  udp.begin(LocalPort);
    //  lastPacketMillis = millis();
    //}
  }
  else if (packetSize < 0)
  {
    printf("Return code: %d\r\n", packetSize);
  }
  else if (packetSize > 0)
  {
    if (packetSize > (int) sizeof(T_TestMessage))
    {
      printf("Packet size error (expected: %u, actual: %u)\r\n", sizeof(T_TestMessage), packetSize);
    }
    else
    {
      T_MessageBuffer messageBuffer;;

      int dataCount = udp.read(messageBuffer.buffer, packetSize);
      if (dataCount != packetSize)
      {
        printf("Packet read error (expected: %u, actual: %u)\r\n", packetSize, dataCount);
      }
      else
      {
        ratePacketCount++;
        packetCount++;
      }
    }
  }

  if ((millis() - rateMillis) >= 5000)
  {
    unsigned long rate = ratePacketCount / 5;
    printf("Packet count: %lu Rate: %lu\r\n", packetCount, rate);
    ratePacketCount = 0;
    rateMillis = millis();
  }
}
 
Some questions: Where exactly does it freeze? If it’s actually crashing, does the Teensy restart? What does printing the CrashReport show at program start?
 
Here is CrashReport at startup:

09:20:01.779 -> Hopefully all is well, but certain types of crashes can't be reported:
09:20:01.779 -> stuck in an infinite loop (technically, hardware still running properly)
09:20:01.779 -> remaining in a low power sleep mode
09:20:01.779 -> access to certain peripherals without their clock enabled (eg, FlexIO)
09:20:01.779 -> change of CPU or bus clock speed without use of glitchless mux
09:20:01.779 -> Breadcrumb #1 was 3361759250 (0xC8606012)
09:20:01.779 -> Breadcrumb #6 was 2528204831 (0x96B1581F)

I added code to the main loop to print out that it was being called every 10 seconds

When it dies, this time after 335,000 packets received, it no longet prints that keep alive message. So the main loop is no longer being called or else something has happened to serial communication

The receive rate is very constant around 1800 packets per second right up until the time it dies

If I do a reset, still the same null CrashReport (just no breadcumb lines) and it starts receiving again
 
Hi Shawn

Another thought. Given that I am sending quite large packets (6008 bytes) much bigger than the MTU max packet size, might there be an issue with the splitting/ combining logic?
 
What I’m gathering from you is there’s no crash or hang. I’m not certain where your program is stopping. What does the current code look like, and where exactly is it apparently freezing? For example, if the program isn’t continuing, then a “print” after every line will certainly stop at some point. Or… are you saying that after a certain point, parsePacket() always returns something negative?

Sending packets larger than the MTU will result in IP reassembly of multiple packets. I wonder how robust lwIP’s reassembly is?
 
I can certainly instrument the code more. The problem is the packet rate and the print statements will slow down the rate and may mask the problem. Hence why I am printing the keep alive every 10 seconds

I will do more instrumentation and let you know the result
 
Here is the latest code

HTML:
#include <QNEthernet.h>

using namespace qindesign::network;

const uint32_t kDHCPTimeout = 10000;  // 10 seconds
const uint32_t PayloadLength = 6000;
const uint32_t LocalPort = 8002;
const uint32_t PacketTimeoutMillis = 5000;

struct __attribute__((packed)) T_MessageHeader
{
  uint32_t messageId;
  uint32_t payloadLength;
};

struct __attribute__((packed)) T_TestMessage
{
  T_MessageHeader messageHeader;
  char payload[PayloadLength];
};

union __attribute__((packed)) T_MessageBuffer
{
  T_TestMessage testMessage;
  char buffer[sizeof(T_TestMessage)];
};

EthernetUDP udp;
unsigned long ratePacketCount;
unsigned long rateMillis;
unsigned long packetCount;
unsigned long aliveMillis;
bool printed;

void setup()
{
  Serial.begin(115200);
  while (!Serial && millis() < 4000)
  {
    // Wait for Serial to initialize
  }
  stdPrint = &Serial;  // Make printf work (a QNEthernet feature)
  printf("Starting...\r\n");

  Serial.print(CrashReport);

  uint8_t mac[6];
  Ethernet.macAddress(mac);  // This is informative; it retrieves, not sets
  printf("MAC = %02x:%02x:%02x:%02x:%02x:%02x\r\n",
         mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);

  printf("Starting Ethernet with DHCP...\r\n");
  if (!Ethernet.begin())
  {
    printf("Failed to start Ethernet\r\n");
    return;
  }
  
  if (!Ethernet.waitForLocalIP(kDHCPTimeout))
  {
    printf("Failed to get IP address from DHCP\r\n");
    return;
  }

  IPAddress ip = Ethernet.localIP();
  printf("    Local IP    = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.subnetMask();
  printf("    Subnet mask = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.gatewayIP();
  printf("    Gateway     = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);
  ip = Ethernet.dnsServerIP();
  printf("    DNS         = %u.%u.%u.%u\r\n", ip[0], ip[1], ip[2], ip[3]);

  // Start UDP listening on the NTP port
  udp.begin(LocalPort);
  packetCount = 0;
  ratePacketCount = 0;
  rateMillis = millis();
  aliveMillis = millis();
 }

void loop()
{
  if (printed)
    printf("1");

  int packetSize = udp.parsePacket();
  if (printed)
    printf("2");

  printed = false;

  if ((millis() - aliveMillis) >= 10000)
  {
    printf("Alive! PacketSize: %d\r\n", packetSize);
    aliveMillis = millis();
  }


  if (packetSize == -1)
  {
    //if ((millis() - lastPacketMillis) >= PacketTimeoutMillis)
    //{
    //  printf("Packet timeout\r\n");
    //  udp.begin(LocalPort);
    //  lastPacketMillis = millis();
    //}
  }
  else if (packetSize < 0)
  {
    printf("Return code: %d\r\n", packetSize);
  }
  else if (packetSize > 0)
  {
    if (packetSize > (int) sizeof(T_TestMessage))
    {
      printf("Packet size error (expected: %u, actual: %u)\r\n", sizeof(T_TestMessage), packetSize);
    }
    else
    {
      T_MessageBuffer messageBuffer;;

      printf("3");
      printed = true;
      int dataCount = udp.read(messageBuffer.buffer, packetSize);
      printf("4");
      printed = true;

      if (dataCount != packetSize)
      {
        printf("Packet read error (expected: %u, actual: %u)\r\n", packetSize, dataCount);
      }
      else
      {
        ratePacketCount++;
        packetCount++;
      }
    }
  }

  if ((millis() - rateMillis) >= 5000)
  {
    unsigned long rate = ratePacketCount / 5;
    printf("Packet count: %lu Rate: %lu\r\n", packetCount, rate);
    ratePacketCount = 0;
    rateMillis = millis();
  }

  if (printed)
  {
    printf("5\r\n");
  }
}

You will see that I am only printing out 1 or 2 if this follows a previous packet reception. This is to limit a mass of 1s and 2s when there is no packet to read

What I get is a stream of complete 12345 sequences (obviously the first one is 345)

What I don't get is

...
12345
12345
1

or

...
12345
12345
12

I believe this means it is hanging after receiving a packet but then the main look is not called again or else I would get a 1 and perhaps a 2
 
Back
Top