Forum Rule: Always post complete source code & details to reproduce any issue!
Page 1 of 2 1 2 LastLast
Results 1 to 25 of 43

Thread: Teensy 4.0 DMA SPI?

  1. #1
    Senior Member
    Join Date
    Mar 2015
    Posts
    159

    Teensy 4.0 DMA SPI?

    I have connected Sharp Memory Display 400 x240 to Teensy 4.0 and it works great. Very fast and responsive expesially when run at 10 Mhz SPI, it seems to run even with 14 Mhz but the specified is only 2 MHz.

    Anyway, it works great, but the display update blocks quite a lot the other tasks, like filtering analogue inputs.

    I have been trying to find some simple exambles how to use DMA on SPI with Teensy 4.0, found couple of posts but nothing really useful.

    How is the DMA SPI working on Teensy 4.0? any good examples. or would there be other methods to make the SPI transfer more efficient, not faster but less resources consuming.

    This is the current display update function
    void setup() {
    SPI.beginTransaction(SPISettings(10000000, LSBFIRST, SPI_MODE0));

    Code:
    void drawScreen() {
      uint8_t lineData[52];
      uint8_t updatedLines[240];
      uint8_t updates;
      
       //update only lines that have updates.
       updates = 0;
      for (int y = 0; y < 240; y++)  { updatedLines[y] = 0; }
      for (int y = 0; y < 240; y++)  { for (int x = 0; x < 50; x++) {
          if( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1; 
            screenDataOld[x][y] = screenData[x][y]; 
             } 
      }} 
    
      if( updates == 1 ){
      digitalWrite(CS,HIGH); //start screen update
      
      SPI.transfer(0x01 | Vcom);//Command
      for (int y = 0; y < 240; y++) {if( updatedLines[y] == 1 ){
        
      SPI.transfer(y); //line number, must be send separate to work correct
      for (int x = 0; x < 50; x++) { lineData[x] = flipByte(screenData[x][y]); }//data for one line
      lineData[50] = 0x00;//Trailer for line
      SPI.transfer(lineData, 51);
    
      }}
      SPI.transfer(0x00); //Trailer for screen
      digitalWrite(CS,LOW);  
    
    }
    }
    I am hoping I could manage the SPI.transfer(lineData, 51); using DMA.

  2. #2
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    Look at one of the display drivers for teensy, like ili9341_t3n st7735_t3 ili9488_t3. And look at updatescreenasync...

  3. #3
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Thanks KurtE, I found this, but not sure if looking the right place

    Code:
    	//==========================================
    	// T4
    	//==========================================
    	#if defined(ENABLE_EXT_DMA_UPDATES)
    	// BUGBUG try first not worry about continueous or not.
      	// Start off remove disable on completion from both...
    	// it will be the ISR that disables it... 
    	if ((uint32_t)_pfbtft >= 0x20200000u)  arm_dcache_flush(_pfbtft, CBALLOC);
    
    	_dmasettings[4].TCD->CSR &= ~( DMA_TCD_CSR_DREQ);
    	setAddr(0, 0, _width-1, _height-1);
    	writecommand_last(ILI9488_RAMWR);
    	_spi_fcr_save = _pimxrt_spi->FCR;	// remember the FCR
    	_pimxrt_spi->FCR = 0;	// clear water marks... 	
    	maybeUpdateTCR(_tcr_dc_not_assert | LPSPI_TCR_FRAMESZ(23) | LPSPI_TCR_RXMSK /*| LPSPI_TCR_CONT*/);
     	_pimxrt_spi->DER = LPSPI_DER_TDDE;
    	_pimxrt_spi->SR = 0x3f00;	// clear out all of the other status...
    
      	//_dmatx.triggerAtHardwareEvent(DMAMUX_SOURCE_LPSPI4_TX );
    
     	_dmatx = _dmasettings[0];
    
      	_dmatx.begin(false);
      	_dmatx.enable();
    
    	_dmaActiveDisplay[_spi_num]  = this;
    	if (update_cont) {
    		_dma_state |= ILI9488_DMA_CONT;
    	} else {
    		_dma_state &= ~ILI9488_DMA_CONT;
    		_dmasettings[4].disableOnCompletion();
    	}
    
    	_dma_state |= ILI9488_DMA_ACTIVE;
    	#else
    	//====================================
    	setAddr(0, 0, _width-1, _height-1);
    	fillDMApixelBuffer(_dma_pixel_buffer0);  // Fill the first buffer
    	writecommand_last(ILI9488_RAMWR);
    
    	// Update TCR to 16 bit mode. and output the first entry.
    	_spi_fcr_save = _pimxrt_spi->FCR;	// remember the FCR
    	_pimxrt_spi->FCR = 0;	// clear water marks... 	
    	maybeUpdateTCR(_tcr_dc_not_assert | LPSPI_TCR_FRAMESZ(23) | LPSPI_TCR_RXMSK /*| LPSPI_TCR_CONT*/);
    //	_pimxrt_spi->CFGR1 |= LPSPI_CFGR1_NOSTALL;
    //	maybeUpdateTCR(_tcr_dc_not_assert | LPSPI_TCR_FRAMESZ(15) | LPSPI_TCR_CONT);
     	_pimxrt_spi->DER = LPSPI_DER_TDDE;
    	_pimxrt_spi->SR = 0x3f00;	// clear out all of the other status...
    
      	//_dmatx.triggerAtHardwareEvent(DMAMUX_SOURCE_LPSPI4_TX );
    
     	_dmatx = _dmasettings[0];
    
      	_dmatx.begin(false);
      	_dmatx.enable();
    	fillDMApixelBuffer(_dma_pixel_buffer1); 	// fill the second one
    
    	_dmaActiveDisplay[_spi_num]  = this;
    	if (update_cont) {
    		_dma_state |= ILI9488_DMA_CONT;
    	} else {
    		_dma_state &= ~ILI9488_DMA_CONT;
    
    	}
    
    	_dma_state |= ILI9488_DMA_ACTIVE;
    	#endif
    and

    Code:
    void	ILI9488_t3::initDMASettings(void) 
    {
    	// Serial.printf("initDMASettings called %d\n", _dma_state);
    	if (_dma_state) {  // should test for init, but...
    		return;	// we already init this. 
    	}
    
    	// T3.6 and T4... 
    #if defined(__IMXRT1052__) || defined(__IMXRT1062__)  // Teensy 4.x
    	#if defined(ENABLE_EXT_DMA_UPDATES)
    	//=============================================
    	// Storing data in 32 bits so can do direct DMAsss
    	//=============================================
    	// 320*480/5 = 30720
    	_dmasettings[0].sourceBuffer(_pfbtft, (COUNT_WORDS_WRITE)*4);
    	_dmasettings[0].destination(_pimxrt_spi->TDR);
    	// _dmasettings[0].TCD->ATTR_DST = 1;  // Will do 32 bit writes
    	_dmasettings[0].replaceSettingsOnCompletion(_dmasettings[1]);
    
    	_dmasettings[1].sourceBuffer(&_pfbtft[COUNT_WORDS_WRITE], COUNT_WORDS_WRITE*4);
    	_dmasettings[1].destination(_pimxrt_spi->TDR);
    	//_dmasettings[1].TCD->ATTR_DST = 1;
    	_dmasettings[1].replaceSettingsOnCompletion(_dmasettings[2]);
    
    	_dmasettings[2].sourceBuffer(&_pfbtft[COUNT_WORDS_WRITE*2], COUNT_WORDS_WRITE*4);
    	_dmasettings[2].destination(_pimxrt_spi->TDR);
    	//_dmasettings[2].TCD->ATTR_DST = 1;
    	_dmasettings[2].replaceSettingsOnCompletion(_dmasettings[3]);
    	if (_frame_callback_on_HalfDone) _dmasettings[2].interruptAtHalf();
    	else  _dmasettings[2].TCD->CSR &= ~DMA_TCD_CSR_INTHALF;
    	_dmasettings[3].sourceBuffer(&_pfbtft[COUNT_WORDS_WRITE*3], COUNT_WORDS_WRITE*4);
    	_dmasettings[3].destination(_pimxrt_spi->TDR);
    	//_dmasettings[3].TCD->ATTR_DST = 1;
    	_dmasettings[3].replaceSettingsOnCompletion(_dmasettings[4]);
    
    	_dmasettings[4].sourceBuffer(&_pfbtft[COUNT_WORDS_WRITE*4], COUNT_WORDS_WRITE*4);
    	_dmasettings[4].destination(_pimxrt_spi->TDR);
    	//_dmasettings[4].TCD->ATTR_DST = 1;
    	_dmasettings[4].replaceSettingsOnCompletion(_dmasettings[0]);
    	_dmasettings[4].interruptAtCompletion();
    
    
    	//============================================
    	#else
    	// Now lets setup DMA access to this memory... 
    	// Try to do like T3.6 except not kludge for first word...
    	// Serial.println("DMA initDMASettings - before settings");
    	// Serial.printf("  CWW: %d %d %d\n", CBALLOC, SCREEN_DMA_NUM_SETTINGS, COUNT_WORDS_WRITE);
    	_dmasettings[0].sourceBuffer(_dma_pixel_buffer0, sizeof(_dma_pixel_buffer0));
    	_dmasettings[0].destination(_pimxrt_spi->TDR);
    //	_dmasettings[0].TCD->ATTR_DST = 0;		// This should be 2 (32 bit)
    	_dmasettings[0].replaceSettingsOnCompletion(_dmasettings[1]);
    	_dmasettings[0].interruptAtCompletion();
    
    	_dmasettings[1].sourceBuffer(_dma_pixel_buffer1, sizeof(_dma_pixel_buffer1));
    	_dmasettings[1].destination(_pimxrt_spi->TDR);
    //	_dmasettings[1].TCD->ATTR_DST = 0;
    	_dmasettings[1].replaceSettingsOnCompletion(_dmasettings[0]);
    	_dmasettings[1].interruptAtCompletion();
    
    	#endif
    	// Setup DMA main object
    	//Serial.println("Setup _dmatx");
    	// Serial.println("DMA initDMASettings - before dmatx");
    	_dmatx.begin(true);
    	_dmatx.triggerAtHardwareEvent(_spi_hardware->tx_dma_channel);
    	_dmatx = _dmasettings[0];
    	if (_spi_num == 0) _dmatx.attachInterrupt(dmaInterrupt);
    	else if (_spi_num == 1) _dmatx.attachInterrupt(dmaInterrupt1);
    	else _dmatx.attachInterrupt(dmaInterrupt2);
    #elif defined(__MK66FX1M0__) 
    	_dmasettings[0].sourceBuffer(&_dma_pixel_buffer0[3], sizeof(_dma_pixel_buffer0)-3);
    	_dmasettings[0].destination(_pkinetisk_spi->PUSHR);
    	_dmasettings[0].TCD->ATTR_DST = 0;
    	_dmasettings[0].replaceSettingsOnCompletion(_dmasettings[1]);
    	_dmasettings[0].interruptAtCompletion();
    
    	_dmasettings[1].sourceBuffer(_dma_pixel_buffer1, sizeof(_dma_pixel_buffer1));
    	_dmasettings[1].destination(_pkinetisk_spi->PUSHR);
    	_dmasettings[1].TCD->ATTR_DST = 0;
    	_dmasettings[1].replaceSettingsOnCompletion(_dmasettings[2]);
    	_dmasettings[1].interruptAtCompletion();
    
    	_dmasettings[2].sourceBuffer(_dma_pixel_buffer0, sizeof(_dma_pixel_buffer0));
    	_dmasettings[2].destination(_pkinetisk_spi->PUSHR);
    	_dmasettings[2].TCD->ATTR_DST = 0;
    	_dmasettings[2].replaceSettingsOnCompletion(_dmasettings[1]);
    	_dmasettings[2].interruptAtCompletion();
    	// Setup DMA main object
    	//Serial.println("Setup _dmatx");
    	// Serial.println("DMA initDMASettings - before dmatx");
    	_dmatx.begin(true);
    	_dmatx.triggerAtHardwareEvent(_spi_hardware->tx_dma_channel);
    	_dmatx = _dmasettings[0];
    	_dmatx.attachInterrupt(dmaInterrupt);
    There seems to be so much going on that it is too much for me to separate the relevant. By this it seems to me there is no DMA SPI support but it can be hacked to support DMA.

    What I would need is a 51 byte SPI transfer using DMA. I was hoping there would be simpler example more focus to that task.Not all the 16 and 32 bit writes and I do not know what is going on that code.

    I am looking for a simple example that would do x number of bytes SPI DMA sending with Teensy 4.0, from that I think I could derive what is needed.

    There is some excamples that I can find that look relevant, but not for Teensy 4.0, they use DmaSpi library, and even with those the code is the non working, why is this so slow etc. but not the solution.

    Ideally I am looking for SPI.transferDMA(lineData, 51); and it should return event when ready. (my current thinking anyway how to approach it)

    Absolutely ideally it would be sending all the lines that need update on SPI.transferDMA(screenData, xBytes); but it seems the display requires some delay between line number and line data.
    Last edited by Garug; 05-18-2021 at 05:47 AM.

  4. #4
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Actually the line number problem was problem on my code. I got it now updating whole screen with a single SPI transfer.

    So the challenge would be to transfer this using DMA SPI.transfer(SPIdata, pixCount); The pixCount is 53 to 12482 depending how many picture lines where updated (or 0 if none). After transfer it should notify the picture was updated.

    the SPI is now initialised using SPI.beginTransaction(SPISettings(10000000, MSBFIRST, SPI_MODE0));
    and (Vcom = Vcom ? 0x00 : 0x40) need to be flipped and send with command (SPIdata[0] = 0x80 | Vcom ) occasionally even if there is no updates.

    but other than that it is just setting the pixels in screenData[x][y] and calling drawScreen(). Note though for x a byte is 8 pixels.

    Code:
    void drawScreen() {
      
      uint8_t  lineData[52];
      uint8_t  SPIdata[12482];
      uint16_t pixCount;
      uint8_t  updatedLines[240];
      uint8_t  updates;
      
       //update only lines that have updates.
       updates = 0;
      for (int y = 0; y < 240; y++)  { updatedLines[y] = 0; }
      for (int y = 0; y < 240; y++)  { for (int x = 0; x < 50; x++) {
          if( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1; 
            screenDataOld[x][y] = screenData[x][y]; 
             } 
      }} 
    
      if( updates == 1 ){
    
      
      SPIdata[0] = 0x80 | Vcom; //screen update command
      pixCount = 1;
      
      for (int y = 0; y < 240; y++)  { 
        
      if( updatedLines[y] == 1 )   {  
      for (int x = 0; x < 52; x++) {
        if( x == 0 )          SPIdata[pixCount] = flipByte(y);       //line number
        if( x > 0 && x < 51 ) SPIdata[pixCount] = screenData[x-1][y];//line pixels
        if( x == 51 )         SPIdata[pixCount] == 0x00;             //Trailer for line
        pixCount++;
       } 
       }
       SPIdata[pixCount] = 0x00;//Trailer for screen
      
      }
      digitalWriteFast(CS,HIGH); //start screen update
      
      SPI.transfer(SPIdata, pixCount);
      
      digitalWriteFast(CS,LOW);  //ens screen update
    
    }
    }
    Here is the Sharp display connection to Teensy 4.0 and previous sample codes https://forum.pjrc.com/threads/23852...ighlight=Sharp
    Last edited by Garug; 05-18-2021 at 07:51 AM.

  5. #5
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    happy to report that with help of this https://forum.pjrc.com/threads/58234...-Audio-library I got the SPI DMA working.

    The funny thing is that the actual screen update rates are now actually faster with 2 MHz SPI clock than with 10 MHz, so I think this method has some problem with speed, but very good now and will still see if get it optimised.

    on definitions I have now added

    //DMA SPI stuff
    volatile bool dmaBusy = false;
    EventResponder callbackHandler;
    SPISettings memSettings(2000000, MSBFIRST, SPI_MODE0);


    on setup

    void setup() {

    pinMode (CS, OUTPUT);
    SPI.begin();
    // Setup the SPI DMA callback
    callbackHandler.attachImmediate(&callback);

    On loop, making sure DMA is not busy before requesting screen update.

    if(dmaBusy == false ) drawScreen();

    and the screen update routine looks like this

    Code:
    void drawScreen() {
      
      uint8_t  lineData[52];
      uint8_t  SPIdata[12482];
      uint16_t pixCount;
      uint8_t  updatedLines[240];
      uint8_t  updates;
      
       //update only lines that have updates.
       updates = 0;
      for (int y = 0; y < 240; y++)  { updatedLines[y] = 0; }
      for (int y = 0; y < 240; y++)  { for (int x = 0; x < 50; x++) {
          if( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1; 
            screenDataOld[x][y] = screenData[x][y]; 
             } 
      }} 
    
      if( updates == 1 ){
    
      
      SPIdata[0] = 0x80 | Vcom;
      pixCount = 1;
      
      for (int y = 0; y < 240; y++)  { 
        
      if( updatedLines[y] == 1 )   {  
      for (int x = 0; x < 52; x++) {
        if( x == 0 )          SPIdata[pixCount] = flipByte(y);       //line number
        if( x > 0 && x < 51 ) SPIdata[pixCount] = screenData[x-1][y];//line pixels
        if( x == 51 )         SPIdata[pixCount] == 0x00;             //Trailer for line
        pixCount++;
       } 
       }
       SPIdata[pixCount] = 0x00;//Trailer for screen
      
      }
    
      dmaBusy = true;
      digitalWriteFast(CS,HIGH); //start screen update
      SPI.beginTransaction(memSettings);
      SPI.transfer((void *)SPIdata, nullptr, pixCount, callbackHandler);
      //callback() will stop the screen update
    }
    }
    
    void callback(EventResponderRef eventResponder)
    {
      //end screen update
      SPI.endTransaction();
      digitalWriteFast(CS,LOW);  
      dmaBusy = false;
    }
    Last edited by Garug; 05-18-2021 at 11:25 AM.

  6. #6
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    What I do not understand is the

    SPI.transfer((void *)SPIdata, nullptr, pixCount, callbackHandler);

    this seems to work as well

    SPI.transfer(SPIdata, 0, pixCount, callbackHandler);

    But where is this documented?

  7. #7
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    Glad you have stuff working.

    Sorry I did not respond earlier, but it is only around 5am here right now... And sorry my first response yesterday was a bit brief... I was on tablet and don't like typing much on tablets...

    As for nullptr versus 0... they more or less equivalent. https://en.cppreference.com/w/cpp/language/nullptr
    That is you could pass in nullptr, or NULL or 0.
    casting to (void *): I am not the total GCC expert, but as the methods first parameter is declared as: const void *
    I believe you can pass a pointer to anything, without having to cast.

    Code:
    There seems to be so much going on that it is too much for me to separate the relevant. By this it seems to me there is no DMA SPI support but it can be hacked to support DMA.
    Not sure what you mean. But there are layers of support for DMA in the system and processor: Chapters 4-6 in Reference Manual discuss DMA.
    Which is to say that it is reasonably complicated.

    Levels of support in system: SPI transfer as you are now using

    If you look in the cores in your case cares\teensy4: you will find two files DMAChannel.h and DMAChannel.cpp, which help you setup a DMA transfer.
    This helps you setup the DMAChannel or channels that you use to do a transfer. SPI transfers may use two one to go out to MOSI and one to come in from MISO.

    These structures, setup the Source and Destinations and there are lots of options on how each of these work. Example if you are doing output to a register, you probably don't want the address to be updated after each transfer. But if your output is to a memory buffer, you probably do want it incremented... Likewise do you want that address to loop back to start at the end? .. Dito for the sources. The structure also has counts/sizes..
    And also has options for what to do when you get to the end of the transfer. Do you wish for it to stop? Do you wish for an Interrupt? Also you have option to interrupt when half completed...

    But there is another issue you can run into, and that is most of these transfers are restricted to 32767 units transferred... Which for example for the ILI9341_t3n stuff where a screen is 320*240 does not fit in 32767.
    There are several ways to handle, like interrupt at completion and setup to do next set of bytes. But there is another method, where you can setup a chain of these setups
    using the data structure (from same file) DMASetting which has same base class... And as I mentioned you have options on what to do when you complete the transfer data in DMAChannel. One option is the replaceSettingsOnCompletion method which sets up to copy another dmasetting into the dmachannel and continue...

    That is what you were seeing in the section:
    Code:
    	// 320*480/5 = 30720
    	_dmasettings[0].sourceBuffer(_pfbtft, (COUNT_WORDS_WRITE)*4);
    	_dmasettings[0].destination(_pimxrt_spi->TDR);
    	// _dmasettings[0].TCD->ATTR_DST = 1;  // Will do 32 bit writes
    	_dmasettings[0].replaceSettingsOnCompletion(_dmasettings[1]);
    
    	_dmasettings[1].sourceBuffer(&_pfbtft[COUNT_WORDS_WRITE], COUNT_WORDS_WRITE*4);
    	_dmasettings[1].destination(_pimxrt_spi->TDR);
    	//_dmasettings[1].TCD->ATTR_DST = 1;
    	_dmasettings[1].replaceSettingsOnCompletion(_dmasettings[2]);
    The sourceBuffer(...) says that the source is a buffer so increments, and this is the funky ILI9488_t3 doing outputs where pixels stored in 32 bits ... More if you really want to know but simply put that display does not support 16 bit pixels, it supports 18 bits where are actually output as 24 bits...

    The destination call sets it up, not as a buffer so it does not increment and is pointing to the TDR (Transmit Data Register) of the SPI port.
    And again these calls setup the size fields within the structure... Also the replace is setup to carry on... As again 320*480 > 32767... needs like 4.7 of these structures...

    So with this we have now defined what transfers to where, but we have not yet said how is it being driven and if I say to do an interrupt, what code do I call?
    So in other places in the code you see things like:
    Code:
    	_dmatx.begin(true);
    	_dmatx.triggerAtHardwareEvent(_spi_hardware->tx_dma_channel);
    	_dmatx = _dmasettings[0];
    	if (_spi_num == 0) _dmatx.attachInterrupt(dmaInterrupt);
    	else if (_spi_num == 1) _dmatx.attachInterrupt(dmaInterrupt1);
    	else _dmatx.attachInterrupt(dmaInterrupt2);
    The triggerAtHardwareEvent tells who is driving the transfers. The code is setup to be table driven depending on which SPI object is used. The table of valid sources is in the reference manual (section 4.4) and
    is in the imxrt.h header file and may be one like: DMAMUX_SOURCE_LPSPI4_TX

    The attachInterrupt call is called to set which function when a dma operation completes (or half completes) and the appropriate bit is set... In are case here we setup a different interrupt handler depending on which SPI port the display is on... As we might have more than one display), so we keep a table that says on SPI1 the c++ object that setup the operation is ...

    Again I don't know if you are wanting this level of details or not.

  8. #8
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Thanks for the detailed and insight full reply. It is two folded do I really want to get so deep on details, the truth is I do, but I should not as it takes time of the tasks I need to complete.

    There is few details I still need to get into, the example I modified had "while(dmaBusy) { yield(); }" but it just seems to hang there, I am not familiar with the use of the yield(), so should I somehow request the resources? for now I just left it out and seems to work, but not 100% sure do I get the freed resources on the loop.

    Anyway I have now plenty of resources and it updates the screen 150 fps moving and rotating a 30x30 bitmap and updating numbers, texta also moves pixel at the time very smoothly. when doing smaller updates the update rates are 400 + fps. so very good and will likely limit the update rate to 120 fps or so. moving the needed graphics on this display looks really smooth.

    There was maybe a bit more noise on analogue inputs (joystick and potentiometers) after going to DMA, but with some filtering they provide very stabile 10 bit resolution. This is for a camera and gimbal controlling joystick, so still need to add communication and take care latenzy stays low. The big task then still remains, the user interface, but now have all basic elements on place.

    Video of a bit older code version, just a test interface https://vimeo.com/549799124

    BTW, though this display is 400x240 pixels, needs only 12482 bytes.
    Last edited by Garug; 05-18-2021 at 04:18 PM.

  9. #9
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    The main thing to check with things like:
    while(dmaBusy) { yield(); }"

    Is I assume your eventhandler function is clearing this field.
    is to make sure that dmaBusy is declared as volatile such that the compiler knows to reload it to do the tests.

  10. #10
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    I am having now very weird problem. If I define a new array, it seems any array, in this case uint8_t SW[11]; the first time I use like doing SW[10] = 1; on the loop, the screen goes blank, though this array has nothing to do with it. the code still runs.

    if I define the uint8_t SW[11]; on the loop it works. This started now after taking the SPI DMA in use as mentioned above

    Teensyuino 1.53, Teensy 4.0, what is going on?

    There is plenty of memory available

    "Sketch uses 55024 bytes (2%) of program storage space. Maximum is 2031616 bytes.
    Global variables use 111284 bytes (21%) of dynamic memory, leaving 413004 bytes for local variables"

    edit, I realised I did also changes with ADC setup, changed

    analogReadRes(12);
    analogReadAveraging(32);

    to (just copypasted it from somewhere, do not understand the notation adc->adc0->)

    adc->adc0->setAveraging( 32 );
    adc->adc0->setResolution( 12 );
    adc->adc0->setConversionSpeed( ADC_CONVERSION_SPEED::LOW_SPEED );
    adc->adc0->setSamplingSpeed( ADC_SAMPLING_SPEED::LOW_SPEED );

    and on definitions

    #include <ADC.h>
    ADC *adc = new ADC();

    What did I do wrong with that, and why is it affecting the, I assume SPI DMA. After changing it back it seems to work, but still testing.

    Edit2

    No, it does not work, any small changes on the code that have nothing to do with the display will make the display blank, and also looks like there is something weird going on with ADC, a potentiometer controls a LED brightens (PWM), it still works after the screen goes plank, but there is sudden jumps. Is this someting to do with DMA conficts? is the SPI DMA wrongly configured? it is now

    definitions

    //DMA SPI stuff
    volatile bool dmaBusy = false;
    EventResponder callbackHandler;
    SPISettings memSettings(8000000, MSBFIRST, SPI_MODE0);

    Setup

    pinMode (10, OUTPUT);
    SPI.begin();
    // Setup the SPI DMA callback
    callbackHandler.attachImmediate(&callback);

    use

    //start screen update
    dmaBusy = true;
    n=0;
    digitalWriteFast(10,HIGH);
    SPI.beginTransaction(memSettings);
    SPI.transfer(SPIdata, 0, pixCount, callbackHandler);

    return;


    }
    }

    void callback(EventResponderRef eventResponder)
    {
    //end screen update
    SPI.endTransaction();
    digitalWriteFast(10,LOW);
    dmaBusy = false;

    }

    Edit3

    it seems if new variables are defined in the loop it works, but not if they are defined at the start of the code, the before the DMA changes variable definitions on start of the loop are ok, so what the heck is going on. I will test on different environment next.
    Last edited by Garug; 05-19-2021 at 06:10 AM.

  11. #11
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Do I need to allocate the memory segment for DMA somehow?

  12. #12
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    I see there has been a lot of discussion of this, but still have problems understanding how and where to allocate the

    uint8_t SPIdata[12512] ; //12482 needed, but somewhere said should be maybe multiply of 32

    Needed for display data to DMA memory correctly, and do not understand why thew SPI libraty is not taking care of that, and still wonder it this actually is my problem, but must be memory related, I think, maybe...

  13. #13
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    So many post to read, but so difficult to find the needed information. here is good discussion about the memory https://forum.pjrc.com/threads/57326...-regions/page1

    Kurt mentions there (from 2019) "b1) Unless you edit startup.c - All of this memory is using memory cache, which causes several issues with using DMA... In many cases you can use functions like: arm_dcache_flush and arm_dcache_delete to get DMA to mostly work. We need some documentation on how/when we should use these functions."

    but have not found instructions yet, is there, where could they be found?

    Still reading, Paul mentions

    "However, I do not believe any amount of documentation is going to make DMA easily accessible to most people. It's an advanced topic. Using DMA successfully (without extreme luck) requires tough troubleshooting."

    So maybe I should have seen that before trying to do 13kb SPI DMA transfer... But I do not really agree on that. To me it seems that the basics are pretty simple stuff, the problems come when things do not work out as they should. I mean why should SPI DMA transfer cause this kind memory problems, why not provide simple way to tell where to allocate the DMA buffer if that can not be automatically done, and probably there is simple way to do that memory allocation, I just have not figured it out yet, as have not found to me understandaple documentation or example how to do that.
    Last edited by Garug; 05-19-2021 at 12:47 PM.

  14. #14
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    It is wery basic questions I have at this time

    - what defines the SPI DMA cache, how do I access it

    - is there a way to allocate that to safe place, I have plenty of memory available and the transfer requires only 13 kb

    - How and where should I use

    // arm_dcache_flush(0x20200000u, 12512);
    // arm_dcache_delete(0x20200000u, 12512);

    This obviously did not work, I do not know on what memory allocation the SPI DMA cache is located or how to find it. I guess the DMAMEM is just a confusing agronomy, or is the DMA actually using it.

    I rather would not change startup.c or SPI.c or any other standard libraries.

    I do use a lot of global variables and like to keep doing that. Right now it seems getting rid of global variables could kind of solve the problem. The strange thing is the old global variables work just fine, but the new ones not, but new local variables seem to be ok. Thye are on DTCM, so I guess the SPI DMA memory allocation is not there.

    It was a long monologue and frustrating day, I do not feel really I learned anything except there seems to be serious problems with T4 SPI DMA and memory management. Tomorrow if there is some pointers will try those, but othervice need to go back to before SPI DMA trials code and get things done.

    Edit

    I still tried reallocating the global variables using DMAMEM and FASTRUN, FLASHMEM and got things working with few new global variables. So I guess that made by accident a free whole for the evil SPI DMA memory allocation, but it feels like it breaks at any time when adding a new global variable. I got the Fonts corrupted when on DMAMEM so maybe the SPI DMA is there after all? But there is couple of bigger arrays that hang the code if put on FASTRUN, but work when on DMAMEM, so I am confused. Why is the SPI DMA spoking all around the memory?

    uint8_t DMAMEM screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t DMAMEM screenDataOld[50][240];

    There must be a way to tell for the SPI DMA what memory it can use, right? but where can I find the instructions?

    Overall there is plenty of memory

    Sketch uses 68624 bytes (3%) of program storage space. Maximum is 2031616 bytes.
    Global variables use 78516 bytes (14%) of dynamic memory, leaving 445772 bytes for local variables. Maximum is 524288 bytes.
    Last edited by Garug; 05-19-2021 at 02:11 PM.

  15. #15
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    Sorry I know that DMA can be a total pain in the ... I feel your pain. When I first started doing some stuff with DMA first with some of these display drivers, I found every chip was different, sometimes in very subtle ways. The biggest pain was T3.5... But that is outside of this conversation.

    I will try to mention a few things here, hopefully will help some. Sorry in advance, I will probably miss some of the questions.

    SPI DMA Cache.... There is no such thing... There is memory caching for different regions of memory on the T4.x processors.
    The first main region of memory that contains the ITCM (most of your code) and DTCM (most of your data) is tightly coupled and does not use the cache.
    Most other memory regions are configured to use the hardware cache. This is setup and configured in startup.c in the function configure_cache

    So for example if your logical frame buffer is setup like:
    Code:
    uint8_t my_tft_buffer[12512] __attribute__((aligned(32));
    It is created in DTCM and you don't have cache issue... Note: DMA does like the aligned stuff... That is one way to do so.

    However if you instead do:
    Code:
    DMAMEM uint8_t my_tft_buffer[12512] __attribute__((aligned(32));
    This puts the buffer up into the RAM2 (from T4.1 page), this is slower memory than RAM1 and startup.c sets up the cache...
    Likewise if you do malloc it also allocates memory from this region.

    The SPI library that does DMA, does check to see the addresses of the buffers and if so calls to arm_dcache_flush (for the buffer it is going to output from) as to make sure anything within that buffer has it's memory updated to match the last things written to physical memory. Likewise if you ask it to return data, it checks again if your buffer is up high again than tries to delete the cache in that region such that any reads after the DMA operation completes will have to read from the physical memory...

    You would never hard code things like:
    // arm_dcache_flush(0x20200000u, 12512);
    // arm_dcache_delete(0x20200000u, 12512);
    But instead you pass in the address of your actual buffer. As that is the region of memory you wish to flush... Again unless you use normal DTCM memory in which case you simply ignore this as there is no cache.

    As for global variables versus local versus malloc/new... I personally prefer not to have too many globals, but instead have classes, which have their data localized as to not run into places where I unintentionally reused a global variable like buffer and run into issues. But again this still does not fully specify where these are allocated. That is you can still have global objects, or local objects... But again sort of outside the
    realm of this...

    Problems with using too many local variables/buffers is that as you call things with local variables, the stack grows down and if you use too many it can grow down sufficiently such that it's address runs into the
    addresses of your local variables and corrupt them... Which can be a real pain...

    With a few programs, I have hacked in code to try to figure how just how much stack space had been used to try to detect this as I have run into it.

    Again not sure if this answered any of your questions... Hope so.

  16. #16
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Thanks Kurt,

    as I edited above got it kind of working using the memory allocations, but really do not understand that SPI DMA seems to act both on DMAMEM and FASTRUN, and really feel the SPI DMA implementation is badly flawed. Why not just simply tell for it exactly where it can use memory and how much.

    One big reason I use a lot of globals is that I believe things stay on place once they are defined like that, but then some SPI DMA comes poking around... And also simply because I am not much skilled on coding, and like to keep all on one .ino Functions I do a lot, but hate pointers.

    edit

    the thing I wonder really how I got it working with plenty of global variables but adding one new breaks it, does that have something to do with the compiler how it allocates the memory for the variables? and it remembers the old allocations? does the Arduino always completely recompile, or is there way to force it to do so.
    Last edited by Garug; 05-19-2021 at 02:43 PM.

  17. #17
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Just to confirm, when the frame buffer is defined as

    uint8_t FASTRUN txBuffer[12512] __attribute__((aligned(32)));

    there is no reason to use arm_dcache_flush(); or arm_dcache_delete();

    and if I would use them it would be arm_dcache_flush(txBuffer, 12512);

    So the SPI DMA only uses the txBuffer[] when defined FASTRUN, but why is there then the problems? txBuffer[] is just a normal array that normally would not cause any problems.

    The FASTRUN I defined for it just a moment a go, so will test now if that removes the problems, but if so the solution would be pretty simple, just defining the txBuffer[] as global with FASTRUN. I will report back tomorrow after testing more.

  18. #18
    Senior Member PaulStoffregen's Avatar
    Join Date
    Nov 2012
    Posts
    24,496
    You probably don't want to use FASTRUN on variables. It's meant for functions. They go into ITCM which is optimized for code, not data.

  19. #19
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    But how do I make the SPI DMA memory stuff then act nicely? Now it is completely unmanageable and un predictable. Works by luck randomly experimenting allocating global variables using FASTRUN, DMAMEM, and FLASHMEM.

    About the DMAMEM, does it have anything to do with DMA transfers?

    I have plenty of memory for this aplication, how should it be allocated not to conflict with SPI DMA?
    Last edited by Garug; 05-19-2021 at 03:12 PM.

  20. #20
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    I did get it working undefining memory alocations for other globals than these

    uint8_t DMAMEM screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t DMAMEM screenDataOld[50][240];
    uint8_t DMAMEM txBuffer[12512] __attribute__((aligned(32)));

    If undefining one or both of the first ones the screen is not updated, the code runs, but there seems to be some in-continuity with ADC or PWM. (potentiometer adjust led brightness) If one of the bitmaps or fonts (also bitmaps) are defined with DMAMEM they work but are corrupted.

    Strangely this works too

    uint8_t DMAMEM screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t DMAMEM screenDataOld[50][240];
    uint8_t txBuffer[12512] __attribute__((aligned(32)));
    Last edited by Garug; 05-19-2021 at 03:59 PM.

  21. #21
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    As I was trying to say there really is not a concept of SPI DMA memory.

    DMA in simple terms simply copies something from one address to another address. Again there are options then on if the address of either source and/or destination changes after each transfer or not.

    The section in the T4.1 memory area: https://www.pjrc.com/store/teensy41.html#memory

    Again there are keywords shown in the memory table: Extract:
    Click image for larger version. 

Name:	screenshot.jpg 
Views:	8 
Size:	52.7 KB 
ID:	24824
    From that diagram, you see:
    DMAMEM simply says to allocate memory out of RAM2
    FASTRUN simply means code that is copied down into ITCM, which by default is more or less all code

    FLASHMEM code is code that is left in FLASH
    PROGMEM is variables that stay up in FLASH - You can not write to these variables usually large read only tables

    As Shown Global variables which are either the Initialized variables or the uninitialized variables (Zeroed) and as I mentioned the Local Variables (stack) is also in that region and grow down.

    RAM2 as shown: all variables marked as DMAMEM have addresses in this region, these variables are not initialized. The areas above these variables are used for the heap (that is used by malloc and new)

    What this table does not show, is the other regions of memory that are used by the chip. That is everything on these boards are addressed by one memory space. This includes things like the registers associated with different hardware components like SPI. For example if you are using the SPI object it is actually using the hardware object LPSPI4 whose registers start at address hex 403A_0000h

    The only complication here is that if your memory that you are trying to output to SPI is for example in RAM2 (malloc or DMAMEM)
    may not reflect the last things you thought you wrote out to that memory...

    That is for example if you had:
    DMAMEM uint8_t my_buffer[1000];
    memset (my_buffer, 0xff, sizeof(my_buffer));
    Do DMA operation from my_buffer to someplace like LPSPI4.TDR , the data that goes out may not be all 0xff as the writes to the memory go through the cache and it is up to the cache to decide
    when to write the data out to underlying memory. And the DMA operations work directly from the memory without any knowledge of the cache.
    That is why the underlying code like in SPI.transfer(.._) does a flush to force the cache to write its contents out to the underlying storage.

  22. #22
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    This is the current sending routine. first the txBuffer is build and immediatelly after sent. So are you saying there should be a delay before sending so that the txBuffer is completelly written to memory before sending? I could separate these and do short time something else before sending.

    if( updatedLines[y] == 1 ) {
    for (int x = 0; x < 52; x++) {
    if( x == 0 ) txBuffer[pixCount] = flipByte(y); //line number
    if( x > 0 && x < 51 ) txBuffer[pixCount] = screenData[x-1][y];//line pixels
    if( x == 51 ) txBuffer[pixCount] = 0x00; //Trailer for line
    pixCount++;
    }
    }
    txBuffer[pixCount] = 0x00;//Trailer for screen

    }

    //start screen update
    dmaBusy = true;

    //mem0DmaWrite(txBuffer, pixCount);
    digitalWriteFast(10,HIGH);
    SPI.beginTransaction(memSettings);
    //SPI.transfer(txBuffer, 0, pixCount, callbackHandler);
    SPI.transfer((void *)txBuffer, nullptr, pixCount, callbackHandler);

    }
    }


    void callback(EventResponderRef eventResponder)
    {
    //end screen update
    SPI.endTransaction();
    digitalWriteFast(10,LOW);
    dmaBusy = false;

    }
    But this does not seem to be the problem. The problem seems to be that for some reason SPI DMA interacts with other variables, like the bitmaps that get corrupted and this bitmap corruption problem can be solved with no other code changes than allocating the bitmap array elsewhere on memory by trial.

    I did read the whole memory thread https://forum.pjrc.com/threads/57326...-regions/page1 and the https://www.pjrc.com/store/teensy41.html#memory but still have no understanding what the problem is and how to solve it, other than reallocating the global variables by trial and error.

    I just simply do not understand how it is considered normal that SPI DMA functionality writes all over the memory. Or why would the bitmaps get corrupted just because they are on wrong memory area, BTW I had the bitmaps also on FLASHMEM and that worked fine with some combinations of memory definitions. The bigger problem is that something else gets corrupted also on some memory allocations and that stops the screen update and causes some problems on ADC or PWM or maybe something else.

    I am too tired now and keep repeating, but just do not understand what the problem is and how to fix it, other than the trial and error and being afraid it breaks again.

  23. #23
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    Sorry, but it is hard to tell you what is going with little bits and pieces of code, which don't show things like how things are defined and the like.

    Also helps to use the code markers (#) instead of quote as to hopefully preserve the line spacing.

    Again things from previous posting:
    Code:
    void drawScreen() {
      
      uint8_t  lineData[52];
      uint8_t  SPIdata[12482];
      uint16_t pixCount;
      uint8_t  updatedLines[240];
      uint8_t  updates;
      
       //update only lines that have updates.
       updates = 0;
      for (int y = 0; y < 240; y++)  { updatedLines[y] = 0; }
      for (int y = 0; y < 240; y++)  { for (int x = 0; x < 50; x++) {
          if( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1; 
            screenDataOld[x][y] = screenData[x][y]; 
             } 
      }} 
    
      if( updates == 1 ){
    
      
      SPIdata[0] = 0x80 | Vcom; //screen update command
      pixCount = 1;
      
      for (int y = 0; y < 240; y++)  { 
        
      if( updatedLines[y] == 1 )   {  
      for (int x = 0; x < 52; x++) {
        if( x == 0 )          SPIdata[pixCount] = flipByte(y);       //line number
        if( x > 0 && x < 51 ) SPIdata[pixCount] = screenData[x-1][y];//line pixels
        if( x == 51 )         SPIdata[pixCount] == 0x00;             //Trailer for line
        pixCount++;
       } 
       }
       SPIdata[pixCount] = 0x00;//Trailer for screen
      
      }
      digitalWriteFast(CS,HIGH); //start screen update
      
      SPI.transfer(SPIdata, pixCount);
      
      digitalWriteFast(CS,LOW);  //ens screen update
    
    }
    }
    But for example trying to do code above using DMA asynchronously. Like simply changing the SPI.transfer here to the one that uses DMA will have issues.
    Why? Because the buffer you are transferring on is from the stack (local variable to this function).
    And as the transfer does not complete before you leave this function, that memory may very easily be mucked with by whatever gets called after this.

    Also I don't know your sketch at all, but that is a pretty good size amount of stack space used by this function.
    So if for example the summary data at the end of your compiles show that what you have left available for local is small.. Example output:
    Code:
    Memory Usage on Teensy 4.1:
      FLASH: code:10124, data:1568, headers:7756   free for files:8107016
       RAM1: variables:8896, code:7584, padding:25184   free for local variables:482624
       RAM2: variables:12384  free for malloc/new:511904
    Then calling this could have your stack go down into memory used for globals and corrupt them... But again don't know your program.

  24. #24
    Senior Member
    Join Date
    Mar 2015
    Posts
    159
    Here is the complete involved subroutines sending the picture.

    The drawScreen() : picture is stored in screenData[x][y] first it is compared to screenDataOld[x][y] to see if there was any changes and the changed lines are stored in updatedLines[240]. This is 400x240 Sharp monochrome display, one byte holds 8 pixels so that is why x goes only to 50.

    If there was updates, the data is put in format required by the display line by line. only the changed lines are sent, lines have line numbers.The lines to be send with SPI DMA are stored in txBuffer[] that is defined as global. The pixCount is the number of bytes in txBuffer[]

    Previously the sending was right after, on this function, but now separated to sendScreen().

    Code:
    uint16_t drawScreen() {
      
      uint16_t pixCount;
      uint8_t  updatedLines[240];
      uint8_t  updates;
      
       //update only lines that have updates.
       updates = 0;
      for (int y = 0; y < 240; y++)  { updatedLines[y] = 0; }
      for (int y = 0; y < 240; y++)  { for (int x = 0; x < 50; x++) {
          if( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1; 
            screenDataOld[x][y] = screenData[x][y]; 
             } 
      }} 
    
      if( updates == 1 ){
        
      txBuffer[0] = 0x80 | Vcom;
      pixCount = 1;
      
      for (int y = 0; y < 240; y++)  { 
        
      if( updatedLines[y] == 1 )   {  
      for (int x = 0; x < 52; x++) {
        if( x == 0 )          txBuffer[pixCount] = flipByte(y);       //line number
        if( x > 0 && x < 51 ) txBuffer[pixCount] = screenData[x-1][y];//line pixels
        if( x == 51 )         txBuffer[pixCount] = 0x00;              //Trailer for line
        pixCount++;
       } 
       }
       txBuffer[pixCount] = 0x00;//Trailer for screen
      
      }}
      return pixCount;
    }
    
    void sendScreen(volatile uint8_t *src, size_t count)
    {
      if (!src) { return; }
      
      dmaBusy = true;
      digitalWriteFast(10,HIGH); 
      SPI.beginTransaction(memSettings);
      //SPI.transfer(src, 0, count, callbackHandler);
      SPI.transfer((void *)src, nullptr, count, callbackHandler);
    
    }
    
    void callback(EventResponderRef eventResponder)
    {
      //end screen update
      SPI.endTransaction();
      digitalWriteFast(10,LOW); 
      dmaBusy = false; 
      
    }
    
    void clearDisplay() {
         digitalWriteFast(10,HIGH);
         SPI.beginTransaction(SPISettings(2000000, MSBFIRST, SPI_MODE0));
         SPI.transfer(0x20 | Vcom);
         SPI.transfer(0x00);
         digitalWriteFast(10,LOW); //end screen update
    }

    On the main loop I am now testing this

    The screenData[x][y] is build before calling these and it is defined as Global

    the callback(EventResponderRef eventResponder) happens after SPI.transfer((void *)src, nullptr, count, callbackHandler) is ready and sets dmaBusy = false, the condition to start sending new image. When the code ends here the image (screenData[x][y]) is already build.

    Code:
      
    
    void loop() {
      //screen frefress
      Vcom = Vcom ? 0x00 : 0x40;
    
    //building the image and storing it to screenData[x][y] etc.
    
    if(dmaBusy == false ) {
        
        //delayMicroseconds(100);
    
        Pixels = drawScreen();
    
        delayMicroseconds(100);
    
        if(Pixels > 1) sendScreen(txBuffer, Pixels);
    }
    The delays seem to have negative effect, I get occasionally random noise that has previously not appeared. Without delays it works about the same as before this change as it should. I am suspecting the delayMicroseconds() has some problems with SPI DMA?

    The remaining relevant code

    the global variables

    Code:
    uint8_t  DMAMEM screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t  DMAMEM screenDataOld[50][240];
    uint8_t  DMAMEM txBuffer[12512] __attribute__((aligned(32)));  
    
      //DMA SPI stuff
      volatile  bool  dmaBusy = false;
      EventResponder  callbackHandler;
      SPISettings     memSettings(8000000, MSBFIRST, SPI_MODE0);
    
    
    uint8_t   Vcom;
    The setup

    Code:
    void setup() {
    
      pinMode (10, OUTPUT);
      SPI.begin(); 
      // Setup the SPI DMA callback
      callbackHandler.attachImmediate(&callback);
    
      
      clearDisplay();
    That should be the complete screen update related code.

    There is plenty of memory available. the remaining code to be made will consume some, but things can be freely allocated to what ever memory location works best, there is plenty of memory for what is needed, currently used

    Sketch uses 54400 bytes (2%) of program storage space. Maximum is 2031616 bytes.
    Global variables use 86708 bytes (16%) of dynamic memory, leaving 437580 bytes for local variables. Maximum is 524288 bytes.

    Ps. I would do this but it provides just empty screen (code runs though somehow)

    Code:
    uint8_t   screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t   screenDataOld[50][240];
    uint8_t   txBuffer[12512] __attribute__((aligned(32)));
    this seems to work though

    Code:
    uint8_t  DMAMEM screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t   screenDataOld[50][240];
    uint8_t   txBuffer[12512] __attribute__((aligned(32)));
    But this does not work

    Code:
    uint8_t    screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t   DMAMEM screenDataOld[50][240];
    uint8_t   txBuffer[12512] __attribute__((aligned(32)));
    As the memory is in 32kb blocks, could it be if txBuffer[12512] is divided to two blocks that causes problems? how to avoid that? is the __((aligned(32))); supposed to do that?
    Last edited by Garug; 05-19-2021 at 06:28 PM.

  25. #25
    Senior Member+ KurtE's Avatar
    Join Date
    Jan 2014
    Posts
    9,219
    Code:
    uint16_t drawScreen() {
    
      uint16_t pixCount;
      uint8_t  updatedLines[240];
      uint8_t  updates;
    
      //update only lines that have updates.
      updates = 0;
      for (int y = 0; y < 240; y++)  {
        updatedLines[y] = 0;
      }
      for (int y = 0; y < 240; y++)  {
        for (int x = 0; x < 50; x++) {
          if ( screenData[x][y] != screenDataOld[x][y]) {
            updatedLines[y] = 1;  updates = 1;
            screenDataOld[x][y] = screenData[x][y];
          }
        }
      }
    
      if ( updates == 1 ) {
    
        txBuffer[0] = 0x80 | Vcom;
        pixCount = 1;
    
        for (int y = 0; y < 240; y++)  {
    
          if ( updatedLines[y] == 1 )   {
            for (int x = 0; x < 52; x++) {
              if ( x == 0 )          txBuffer[pixCount] = flipByte(y);      //line number
              if ( x > 0 && x < 51 ) txBuffer[pixCount] = screenData[x - 1][y]; //line pixels
              if ( x == 51 )         txBuffer[pixCount] = 0x00;             //Trailer for line
              pixCount++;
            }
          }
          txBuffer[pixCount] = 0x00;//Trailer for screen
    
        }
      }
      return pixCount;
    }
    Sorry I don't have time to go through all of the code, but there may be little things screwing up...

    Like the above. What happens if updates != 1?

    You still return pixcount which is not initialized. My guess is the compiler probably output a warning about this...

    Will try to look again later, but have some coming now

    EDIT: I hacked up dummy sketch with this function and added a few defines
    Code:
    uint8_t   screenData[50][240];//50 holds 8 x 50 = 400 B/W pixels
    uint8_t   screenDataOld[50][240];
    uint8_t   txBuffer[12512] __attribute__((aligned(32)));
    #define Vcom 0x1
    #define flipByte(y) (y)
    Sure enough:
    Code:
    "C:\\arduino-1.8.15\\hardware\\teensy/../tools/arm9/bin/arm-none-eabi-g++" -c -O2 -g -Wall -ffunction-sections -fdata-sections -nostdlib -MMD -std=gnu++14 -fno-exceptions -fpermissive -fno-rtti -fno-threadsafe-statics -felide-constructors -Wno-error=narrowing -mthumb -mcpu=cortex-m7 -mfloat-abi=hard -mfpu=fpv5-d16 -D__IMXRT1062__ -DTEENSYDUINO=154 -DARDUINO=10815 -DARDUINO_TEENSY41 -DF_CPU=600000000 -DUSB_SERIAL -DLAYOUT_US_ENGLISH "-IC:\\Users\\kurte\\AppData\\Local\\Temp\\arduino_build_665463/pch" "-IC:\\arduino-1.8.15\\hardware\\teensy\\avr\\cores\\teensy4" "C:\\Users\\kurte\\AppData\\Local\\Temp\\arduino_build_665463\\sketch\\abc.ino.cpp" -o "C:\\Users\\kurte\\AppData\\Local\\Temp\\arduino_build_665463\\sketch\\abc.ino.cpp.o"
    abc: In function 'uint16_t drawScreen()':
    abc:54: warning: 'pixCount' may be used uninitialized in this function 
       54 |   return pixCount;
          |          ^~~~~~~~
    If you have not already done so, I strongly recommend you turn on output of all warnings. This is set in preferences.

    And look to see what each one is.

    With this one, if you call this function and nothing changed, it will return back what ever garbage that was in the stack at that time at that location...
    Obviously this is simple to fix:
    uint16_t pixCount = 0;
    Last edited by KurtE; 05-19-2021 at 06:52 PM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •