[posted] 32 Channel Parallel NeoPixel Driver

lorencc

Member
We have been building art cars (mutant vehicles) for Burning Man for many years. Our current one is based on a solar electrified Worksman side-by-side tricycle. For night driving it is illuminated by 22 pairs of NeoPixel (WS2812) strips for a total of ~6400 RGB LEDs. The strips are in pairs because half of them are always on the other side. :cool: Anyway, many of the dozens of light patterns are animated and there was a strong desire for the animation to be as smooth as possible which required a high frame rate. The lights are controlled by a Teensy 3.6 running at 256 MHz that drives all 22 signal lines in parallel through some 74HCT245s at the maximum clock rate for the neoPixels. We can easily achieve well over a 200 Hz frame rate.

The code presented in this post is an illustration of how that works. Here we show a 32 channel example, which, btw., also runs at the maximum NeoPixel clock rate of ~1MHz. Total output rate is ~1.3 million RGB pixels per second.

The serialization of the color bits is done in the TRANSPOSE macros using the BFI instruction for the ARM and a word is sent directly to the ports on the chip. The NeoPixel documentation has separate timing rules for sending a zero and a one. We just turn on the line, wait 200 ns, send out the data bit which either turns it off (a zero) or leaves it high (a one), wait ~400 ns and send out a zero which turns off the one or leaves it low, then wait ~400 ns for the next bit. The serialization fits in nicely during the wait for the next bit.

Serialization of 32 channels is accomplished in the ~400ns of interbit time. Our 22 channel version has NOPs to make the timing work out. Adding more channels beyond 32 would cost about 3-1/2 cycles each.

The NeoPixel strips do NOT have to be all the same length. Once a strip is full, any additional bits are shifted off the end, even if they come from overrunning your data arrays. So set the length to the longest strip and you will be fine.



Code:
[FONT=Courier New]//  Fast parallel NeoPixel driver, generalized to 32 channels, timing set for a 256MHz Teensy 3.6.

//  Loren Carpenter, January 2020.

//  Runs at ~1 microsecond per bit, or ~1.3 million pixels per second.  You will need a NOP delay with fewer channels.

//  The bit assignment is somewhat arbitrary and is easily reconfigured.

#define  TRANSPOSE_B(R,S0,S1,S2,S3,S4,S5,S6,S7,M)   \  // Port B uses bits 0-3,16-19
   {  int  t;                                       \
      R = S0>>M;                                    \
      t = S1>>M;  asm volatile ( "bfi  %[d],%[s],#1,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S2>>M;  asm volatile ( "bfi  %[d],%[s],#2,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S3>>M;  asm volatile ( "bfi  %[d],%[s],#3,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S4>>M;  asm volatile ( "bfi  %[d],%[s],#16,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S5>>M;  asm volatile ( "bfi  %[d],%[s],#17,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S6>>M;  asm volatile ( "bfi  %[d],%[s],#18,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S7>>M;  asm volatile ( "bfi  %[d],%[s],#19,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      R &= 0xF000F;  }    // don't set unused pins

#define  TRANSPOSE_8(R,S0,S1,S2,S3,S4,S5,S6,S7,M)   \  // Ports C and D use bits 0-7
   {  int  t;                                       \
      R = S0>>M;                                    \
      t = S1>>M;  asm volatile ( "bfi  %[d],%[s],#1,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S2>>M;  asm volatile ( "bfi  %[d],%[s],#2,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S3>>M;  asm volatile ( "bfi  %[d],%[s],#3,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S4>>M;  asm volatile ( "bfi  %[d],%[s],#4,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S5>>M;  asm volatile ( "bfi  %[d],%[s],#5,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S6>>M;  asm volatile ( "bfi  %[d],%[s],#6,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );     \
      t = S7>>M;  asm volatile ( "bfi  %[d],%[s],#7,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : ); }

#define  TRANSPOSE_C(R,S0,S1,S2,S3,S4,S5,S6,S7,M)   TRANSPOSE_8(R,S0,S1,S2,S3,S4,S5,S6,S7,M)

#define  TRANSPOSE_D(R,S0,S1,S2,S3,S4,S5,S6,S7,M)   TRANSPOSE_8(R,S0,S1,S2,S3,S4,S5,S6,S7,M)

#define  TRANSPOSE_E(R,S0,S1,S2,S3,S4,S5,S6,S7,M)   \  // Port E uses bits 0-5,10,11
   {  int  t;                                       \
      R = S0>>M;                                    \
      t = S1>>M;  asm volatile ( "bfi  %[d],%[s],#1,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S2>>M;  asm volatile ( "bfi  %[d],%[s],#2,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S3>>M;  asm volatile ( "bfi  %[d],%[s],#3,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S4>>M;  asm volatile ( "bfi  %[d],%[s],#4,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S5>>M;  asm volatile ( "bfi  %[d],%[s],#5,#1   \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S6>>M;  asm volatile ( "bfi  %[d],%[s],#10,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      t = S7>>M;  asm volatile ( "bfi  %[d],%[s],#11,#1  \n\t" : [d] "+r" (R) : [s] "r" (t) : );    \
      R &= 0xC3F; }

#define NOP   __asm__ __volatile__ (" mov  r0,r0  \n\t" : : )

#define  NOP10 {NOP;NOP;NOP;NOP;NOP;NOP;NOP;NOP;NOP;NOP;}
#define  NOP30 {NOP10;NOP10;NOP10;}
#define  NOP40 {NOP10;NOP10;NOP10;NOP10;}

#define  NLED  1000
#define  NRGB  (NLED*3)

// Some colors to send, in NeoPixel GRB order

unsigned char  s1[NRGB],  s2[NRGB],  s3[NRGB],  s4[NRGB],  s5[NRGB],  s6[NRGB],  s7[NRGB],  s8[NRGB];
unsigned char  s9[NRGB],  s10[NRGB], s11[NRGB], s12[NRGB], s13[NRGB], s14[NRGB], s15[NRGB], s16[NRGB];
unsigned char  s17[NRGB], s18[NRGB], s19[NRGB], s20[NRGB], s21[NRGB], s22[NRGB], s23[NRGB], s24[NRGB];
unsigned char  s25[NRGB], s26[NRGB], s27[NRGB], s28[NRGB], s29[NRGB], s30[NRGB], s31[NRGB], s32[NRGB];

//////////////////////////////////////////////////////////////////////////////////////////////////////

void sendAll() {

 int  b0, b1, b2, b3, b16, b17, b18, b19;
 int  c0, c1, c2, c3, c4,  c5,  c6,  c7;
 int  d0, d1, d2, d3, d4,  d5,  d6,  d7;
 int  e0, e1, e2, e3, e4,  e5,  e10, e11;
 int  tb, tc, td, te;
 int  i, j;

//  Make sure all used pins are configured for output.

 GPIOB_PDOR = 0;
 GPIOC_PDOR = 0;
 GPIOD_PDOR = 0;
 GPIOE_PDOR = 0;

 delayMicroseconds(30);

 for (i = 0; i < NRGB; i++)  {

   b0  = s1[i];
   b1  = s2[i];
   b2  = s3[i];
   b3  = s4[i];
   b16 = s5[i];
   b17 = s6[i];
   b17 = s7[i];
   b18 = s8[i];

   c0  = s9[i];
   c1  = s10[i];
   c2  = s11[i];
   c3  = s12[i];
   c4  = s13[i];
   c5  = s14[i];
   c6  = s15[i];
   c7  = s16[i];

   d0  = s17[i];
   d1  = s18[i];
   d2  = s19[i];
   d3  = s20[i];
   d4  = s21[i];
   d5  = s22[i];
   d6  = s23[i];
   d7  = s24[i];

   e0  = s25[i];
   e1  = s26[i];
   e2  = s27[i];
   e3  = s28[i];
   e4  = s29[i];
   e5  = s30[i];
   e10 = s31[i];
   e11 = s32[i];

   for (j = 0; j < 8; j++)  {

     TRANSPOSE_B (tb,b0,b1,b2,b3,b16,b17,b18,b19,j);
     TRANSPOSE_C (tc,c0,c1,c2,c3,c4,c5,c6,c7,j);
     TRANSPOSE_D (td,d0,d1,d2,d3,d4,d5,d6,d7,j);
     TRANSPOSE_E (te,e0,e1,e2,e3,e4,e5,e10,e11,j);

     GPIOB_PDOR = 0xF000F;
     GPIOC_PDOR = 0xFF;
     GPIOD_PDOR = 0xFF;
     GPIOE_PDOR = 0xC0F;

     NOP40;

     GPIOB_PDOR = tb;
     GPIOC_PDOR = tc;
     GPIOD_PDOR = td;
     GPIOE_PDOR = te;

     NOP40;  NOP40;

     GPIOB_PDOR = 0;
     GPIOC_PDOR = 0;
     GPIOD_PDOR = 0;
     GPIOE_PDOR = 0;
       
       // add delay here if you use fewer than 32 channels (~3-4 NOPs per channel removed)
   }
}[/FONT]
SplashDay2012.jpg

Twilight image of the previous golf cart version.

IMG_1941.jpg

Tricycle version in the shop.

IMG_1940.jpg

Control box under the seats.

IMG_1939.jpg

Control board with Teensy 3.6 and HCT245s, plus battery chargers and relays.

[video]http://www.cinematrix.com/BMan/2015/SplashNight.mov[/video]

Click to play.
 
Back
Top