I was experimenting with the Teensy4 implementation of the HardwareSerial read ring buffer.
So I noticed that there was a primary buffer that is always used, and an additional
buffer, which size is set using addMemoryForRead.
As for the rest of the implementation, it does no seems to change much compared to Arduino,
byte level functions are in HardwareSerialX, And higher level read functions are in Stream.cpp
Since there may be cases where reads on the serial ports are infrequent and the read ring buffer is provisioned to large values, such as > 1KB
It could be nice to have higher level functions (Stream.cpp) that do not rely on a loop that calls read() n times to fetch all available bytes,
But to get n to all available bytes in one call, or get bytes until a provided separator character is found, and compare the performance.
I made some tests about such a "get_" function, using HardwareSerial read() as a basis.
Currently testing for dropped chars using the new get_ function, using variable window size to ensure all branches of the memchrcpy logic are accessed and free of bug,
As for performance : For now, it seems that the performance gains are significant already at low tens of bytes read get_ vs read() loop, This will be the object of the follow up.
Higher level functions that use the low level get_ would live inside Stream.cpp
Such as :
So I noticed that there was a primary buffer that is always used, and an additional
buffer, which size is set using addMemoryForRead.
As for the rest of the implementation, it does no seems to change much compared to Arduino,
byte level functions are in HardwareSerialX, And higher level read functions are in Stream.cpp
Since there may be cases where reads on the serial ports are infrequent and the read ring buffer is provisioned to large values, such as > 1KB
It could be nice to have higher level functions (Stream.cpp) that do not rely on a loop that calls read() n times to fetch all available bytes,
But to get n to all available bytes in one call, or get bytes until a provided separator character is found, and compare the performance.
I made some tests about such a "get_" function, using HardwareSerial read() as a basis.
- It uses memcpy / memchr for memory copy and separator scanning in a single function.
- Disables interrupt over all encompassing scopes / branches that use "memchrcpy", since the buffers are volatile, and we don't want FIFO RX interrupt to mess with the ring buffer while we copy. This may be the limiting factor based on the performance of the function for memcpy of large buffers.
- Takes into account both buffers (the primary with rx_buffer_size bytes) and extended buffer (rx_total_buffer_size) for memcpy / memchr operations
- No change in flow control pin (cts/rts) behaviour
Currently testing for dropped chars using the new get_ function, using variable window size to ensure all branches of the memchrcpy logic are accessed and free of bug,
As for performance : For now, it seems that the performance gains are significant already at low tens of bytes read get_ vs read() loop, This will be the object of the follow up.
C++:
// This is a utility function to copy from source to destination but also look for a separator character and stop copying if it is found. It returns the position of the separator character if found or 0 if not found. if use_sep is false, then it just does a normal memcpy and returns 0.
inline int HardwareSerialIMXRT::memchrcpy(uint8_t* dst, uint8_t* src, int length, bool use_sep = false, const char* sep = "\x0A")
{
if(use_sep)
{
uint8_t* result = (uint8_t*) memchr(src, *sep, length);
size_t position = (result != nullptr) ? (result - src + 1) : length;
memcpy(dst, src, position);
return (result != nullptr) ? position : 0;
}
else
{
memcpy(dst, src, length);
return 0;
}
}
// Get up to n bytes of data from the receive buffer and place them into the provided buffer. If use_sep is true, then look for the separator character and stop copying if it is found. Return the number of bytes copied into the buffer which could be less than length if the separator character was found or if there was not that much data available.
int HardwareSerialIMXRT::get_(uint8_t* bytes, int length, bool use_sep = false, const char* sep = "\x0A")
{
IMXRT_LPUART_t *port = (IMXRT_LPUART_t *)port_addr;
uint32_t head, tail;
int c;
int newlength = 0;
__disable_irq();
uint32_t cycles = ARM_DWT_CYCCNT;
head = rx_buffer_head_;
tail = rx_buffer_tail_;
//debug = rx_buffer_tail_;
if (head == tail) {
// empty Now check for stuff in FIFO Queue.
c = -1; // assume nothing to return
if (port->WATER & 0x7000000) {
uint32_t newhead;
c = port->DATA & 0x3ff; // Use only up to 10 bits of data
newhead = head + 1;
if (newhead >= rx_buffer_total_size_) newhead = 0;
if (newhead != rx_buffer_tail_) {
head = newhead;
if (newhead < rx_buffer_size_) {
rx_buffer_[head] = c;
} else {
rx_buffer_storage_[head-rx_buffer_size_] = c;
}
}
rx_buffer_head_ = newhead;
head = newhead;
}
}
//if (++tail >= rx_buffer_total_size_) tail = 0;
int avail;
if (head >= tail)
{
avail = head - tail;
length = (avail > length) ? length : avail;
if (tail + length <= rx_buffer_size_)
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
//c= -2; // debug
}
else if(tail < rx_buffer_size_ )
{
if(tail + length > rx_buffer_size_)
{
uint8_t first_part_length = rx_buffer_size_ - tail;
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), first_part_length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
newlength = memchrcpy(bytes + first_part_length, (uint8_t*) &(rx_buffer_storage_[0]), length - first_part_length, use_sep, sep);
if(newlength) {length = first_part_length + newlength; goto func_exit;}
//c = -3; // debug
}
else
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
//c = -4; // debug
}
}
else // tail >= rx_buffer_size_
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_storage_[tail - rx_buffer_size_]), length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
//c = -5; // debug
}
}
else // head < tail
{
avail = rx_buffer_total_size_ + head - tail;
length = (avail > length) ? length : avail;
if(tail >= rx_buffer_size_)
{
if(tail + length <= rx_buffer_total_size_)
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_storage_[tail - rx_buffer_size_]), length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
//c = -6; // debug
}
else // tail + length > rx_buffer_total_size_
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_storage_[tail - rx_buffer_size_]), rx_buffer_total_size_ - tail, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
if(length - (rx_buffer_total_size_ - tail) > rx_buffer_size_)
{
newlength = memchrcpy(bytes + (rx_buffer_total_size_ - tail), (uint8_t*) &(rx_buffer_[0]), rx_buffer_size_, use_sep, sep);
if(newlength) {length = rx_buffer_total_size_ - tail + newlength; goto func_exit;}
newlength = memchrcpy(bytes + (rx_buffer_total_size_ - tail) + rx_buffer_size_, (uint8_t*) &(rx_buffer_storage_[0]), length - (rx_buffer_total_size_ - tail) - rx_buffer_size_, use_sep, sep);
if(newlength) {length = (rx_buffer_total_size_ - tail) + rx_buffer_size_ + newlength; goto func_exit;}
//c = -7; // debug
}
else
{
newlength = memchrcpy(bytes + (rx_buffer_total_size_ - tail), (uint8_t*) &(rx_buffer_[0]), length - (rx_buffer_total_size_ - tail), use_sep, sep);
if(newlength) {length = rx_buffer_total_size_ - tail + newlength; goto func_exit;}
//c = -8; // debug
}
}
}
else // tail < rx_buffer_size_
{
if(tail + length <= rx_buffer_size_)
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), length, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
//c = -9; // debug
}
else // tail + length > rx_buffer_size_
{
if(tail + length > rx_buffer_total_size_)
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), rx_buffer_size_ - tail, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
newlength = memchrcpy(bytes + (rx_buffer_size_ - tail), (uint8_t*) &(rx_buffer_storage_[0]), rx_buffer_total_size_ - rx_buffer_size_, use_sep, sep);
if(newlength) {length = (rx_buffer_size_ - tail) + newlength; goto func_exit;}
newlength = memchrcpy(bytes + (rx_buffer_total_size_ - tail), (uint8_t*) &(rx_buffer_[0]), length - (rx_buffer_total_size_ - tail), use_sep, sep);
if(newlength) {length = (rx_buffer_total_size_ - tail) + newlength; goto func_exit;}
//c = -10; // debug
}
else // tail + length <= rx_buffer_total_size_
{
newlength = memchrcpy(bytes, (uint8_t*) &(rx_buffer_[tail]), rx_buffer_size_ - tail, use_sep, sep);
if(newlength) {length = newlength; goto func_exit;}
newlength = memchrcpy(bytes + (rx_buffer_size_ - tail), (uint8_t*) &(rx_buffer_storage_[0]), length - (rx_buffer_size_ - tail), use_sep, sep);
if(newlength) {length = (rx_buffer_size_ - tail) + newlength; goto func_exit;}
//c = -11; // debug
}
}
}
}
func_exit:
//debug2 = rx_buffer_tail_;
//debug = ARM_DWT_CYCCNT - cycles;
//debug2 = ARM_DWT_CYCCNT - cycles;
rx_buffer_tail_ = (rx_buffer_tail_ + length >= rx_buffer_total_size_) ? rx_buffer_tail_ + length - rx_buffer_total_size_ : rx_buffer_tail_ + length;
__enable_irq();
return length;
//return c; // debug
}
Higher level functions that use the low level get_ would live inside Stream.cpp
Such as :
C++:
int Stream::getuntil(uint8_t* bytes, int length, const char* sep = "\x0A")
{
return get_(bytes, length, true, sep);
}
int Stream::get(uint8_t* bytes, int length)
{
return get_(bytes, length, false, nullptr);
}