i dont think it uses the 8 stacks if using less, however, if i never used threads in my dual lcd setup, the jitter is very noticable when streaming to both lcds the animations in a single loop, now the speed is so fast its as if only one is connected, but theres actually 2, running at top speed
class ThreadInfo {
public:
int stack_size;
uint8_t *stack=0;
int my_stack = 0;
software_stack_t save;
volatile int flags = 0;
int priority = 0;
void *sp;
int ticks;
};
typedef void (*ThreadFunction)(void*);
typedef void (*ThreadFunctionInt)(int);
typedef void (*ThreadFunctionNone)();
typedef struct {
uint32_t r4;
uint32_t r5;
uint32_t r6;
uint32_t r7;
uint32_t r8;
uint32_t r9;
uint32_t r10;
uint32_t r11;
uint32_t lr;
#ifdef __ARM_PCS_VFP
uint32_t s0;
uint32_t s1;
uint32_t s2;
uint32_t s3;
uint32_t s4;
uint32_t s5;
uint32_t s6;
uint32_t s7;
uint32_t s8;
uint32_t s9;
uint32_t s10;
uint32_t s11;
uint32_t s12;
uint32_t s13;
uint32_t s14;
uint32_t s15;
uint32_t s16;
uint32_t s17;
uint32_t s18;
uint32_t s19;
uint32_t s20;
uint32_t s21;
uint32_t s22;
uint32_t s23;
uint32_t s24;
uint32_t s25;
uint32_t s26;
uint32_t s27;
uint32_t s28;
uint32_t s29;
uint32_t s30;
uint32_t s31;
uint32_t fpscr;
#endif
} software_stack_t;
Thanks for all your ideas. I guess 12K of storage is a bit much if you're not using it all. Of course, the part between `__ARM_PCS_VFP ` is only compiled for CPUs with floating point, so for the Teensy 3.2, LC, etc, it uses quite a bit less memory.
In any case, here is my thinking. One possibility is we can put the state on the stack so there is only one allocation. Or we can allocate a ThreadInfo every time a thread is created (and release when done). These scenarios are problematic because even if the thread is finished, you should still be able to inquire about its state. So when do you deallocate the ThreadInfo structure? At the very least, I can move the "save" structure that stores the registers to the stack. That doesn't need to survive the end of the thread. Most implementations of threads do this. I didn't in order to simplify debugging.
Right now, the library has a fixed array and just overrides unused ThreadInfo items. We could change that to a linked list that allocates new ThreadInfo items for new threads when all existing items are used. When threads stop, the library just marks these items as empty and then new threads reuse these empty items. In this way, threads are allocated but never deallocated.
typedef struct {
public:
uint8_t stack_size = 0; //How much memory will be needed by a calloc()?
uint8_t *stack = &stack_size; //Pointer to the memory given to the thread?
uint8_t my_stack = 0;
software_stack_t* softStack; //pointer to the stack this thread had/has access to. (Have to be able to implement critical sections of code) (can also reuse pointers to "dead" threads)
volatile uint8_t states = 0; //([FIRST_RUN], [STARTED], [STOPPED]) 3 flags, 1 byte
volatile uint8_t threadStates = 0; //([EMPTY], [RUNNING], [ENDED], [ENDING], [JOIN_WAIT], [PAUSED], [RESUMING]) //7 flags, 1 byte.
uint8_t priority = 0;
void *sp;
int ticks; //Maybe make it atomic?
//And for those of us who have debugged code with an insane amounts of threading//
String threadName;
} threadInfo;
typedef struct threadNode{
public:
int numTotalThreads; //This will be the total length of the list.(Running threads or otherwise)
threadNode* prev;
ThreadInfo* threadInfo;
threadNode* next;
};
threadNode* head;
threadNode* tail;
Would it be possible/make sense to have the user code create the static stack space needed for each thread and pass the pointer for use in the linked list as they get created? Optionally if the user passes a NULL pointer do the alloc as the thread is created?
void heartbeat()
{
pinMode(LED_PIN_INTERNAL, OUTPUT);
while(true)
{
digitalWrite(LED_PIN_INTERNAL, LOW);
threads.delay(500);
digitalWrite(LED_PIN_INTERNAL, HIGH);
threads.delay(500);
}
}
void BaseControl::readSerialData()
{
uint32_t dataRead = 0x00;
char serialDataCharRead = 0x00;
bool storeData = false;
bool serialDataReadComplete = false;
while(true)
{
while (Serial.available())
{
if (m_inputData.index < sizeof(m_inputData.buffer))
{
serialDataCharRead = Serial.read();
dataRead = (serialDataCharRead << 24) | (dataRead >> 8);
if (storeData == true)
{
m_inputData.buffer[m_inputData.index] = serialDataCharRead;
m_inputData.index++;
if (dataRead == COMMAND_CODES_COMMON_COMMAND_END)
{
serialDataReadComplete = true;
storeData = false;
break;
}
}
if (storeData == false && dataRead == COMMAND_CODES_COMMON_COMMAND_START)
{
storeData = true;
// Copy the command start value into buffer to allow data structure reuse
memcpy((void*) m_inputData.buffer, (void *) &dataRead, sizeof(dataRead));
m_inputData.index = sizeof(dataRead);
}
}
else
{
// Can't store any more data, need to process what is in the buffer
serialDataReadComplete = true;
storeData = false;
break;
}
}
if (serialDataReadComplete == true)
{
// Wait for the other thread to signal to copy the serial data in to the buffer
m_lockCopyData.lock();
m_dataToBeProcessed = m_inputData;
// Signal the other thread that the copy of the serial data in to the buffer is done
m_lockProcessData.unlock();
// Clear read buffer
m_inputData.clear();
serialDataReadComplete = false;
}
}
}
void BaseControl::processBaseCommand()
{
while (true)
{
// Signal the other thread to copy the new command in to the buffer
m_lockCopyData.unlock();
// Wait for the other thread to signal the copy of the serial data in to the buffer is done
m_lockProcessData.lock();
if (m_dataToBeProcessed.index > 0)
{
const uint32_t commandCode = getCommandCode();
switch (commandCode)
{
case COMMAND_CODES_COMMON_SUB_SYSTEM_ID:
processSubsystemIDRequest();
break;
default:
processSubsystemCommand(commandCode);
break;
}
// Clear the decode data buffer
m_dataToBeProcessed.clear();
}
}
}