Forum Rule: Always post complete source code & details to reproduce any issue!
Results 1 to 7 of 7

Thread: Freeverb - faster code base for Teensy 3.2 using SIMD

  1. #1
    Member
    Join Date
    Sep 2016
    Location
    Berlin, Germany
    Posts
    23

    Freeverb - faster code base for Teensy 3.2 using SIMD

    Hi all,

    I'm sharing this library modification with all, in the hope some may find it useful. I noted that whilst developing a little project of mine that the freeverb effect was limited to the single channel option when running on a Teensy 3.2. I needed two independent channels and I only had a Teensy 3.2 available, which was also running TFT code and a small GUI.

    Running the Teensy at 120Mhz I noted the following:
    The standard freeverb library was consuming ~ 48% of CPU time (per channel). If I ran two simultaneously then as suggest by documentation the total CPU consumption was over 96%. Resulting in my TFT having the performance characteristics of a tectonic plate migration.

    I resolved to take a closer look at the code base. It was quickly clear that there had been no attempt to optimise the code to make use of the SIMD capabilities of the M4. I began to make incremental additions using inline assembler. Finally resulting in an almost complete replacement of the code. It looks messy with so many small snippets of asm, but in fact it allowed me to test each element to ensure the computational results of each section was 100% identical to the original code (for good or bad)

    The changes included using:
    - Packing instructions to align input data and coefficients.
    - Quad 16bit multiply with accumulation for state variable calculations.
    - Replacement of the C based saturation and round to zero routines with intrinsic M4 SSAT instructions and skipping of round to zero when no truncation was used.
    - Saturating additions to the combfilter summations.

    Final result was very helpful for my project. I reduced the CPU usage for each instantiation of freeverb to ~32%, thereby allowing me to operate 2 instances on the Teensy 3.2 and still having well over 30% CPU remaining.

    Hope someone else may find it helpful:

    Here is the code, this is just a single mono instance, if stereo is needed the same code must be duplicated and worked together.


    All the best
    Aidan



    Code:
    void AudioEffectFreeverb::update()
    {
    #if defined(__ARM_ARCH_7EM__)
    	const audio_block_t *block;
    	audio_block_t *outblock;
    	int i;
    	int16_t input, bufout, output;
    	int32_t sum;
    	
    	uint32_t PackA, PackB;
    	
    
    	outblock = allocate();
    	if (!outblock) {
    		audio_block_t *tmp = receiveReadOnly(0);
    		if (tmp) release(tmp);
    		return;
    	}
    	block = receiveReadOnly(0);
    	if (!block) block = &zeroblock;
    	
    	
    	for (i=0; i < AUDIO_BLOCK_SAMPLES; i++) {
    
                   // TODO: scale numerical range depending on roomsize & damping
    		
    		asm volatile(
    			"MUL %[RESULT], %[BLOCK], %[CONST]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(input)
    			:[BLOCK]"r"((int32_t)block->data[i]),[CONST]"r"(8738),[R_RND]"r"(0x1FFFF),[I_SHIFT]"i"(17)
    			:
    			);
    		
    		sum = 0;
    
    
    		asm volatile (
    			"QADD %[SUM], %[C1], %[C2]\n"
    			"QADD %[SUM], %[C3], %[C4]\n"
    			"QADD %[SUM], %[C5], %[C6]\n"
    			"QADD %[SUM], %[C7], %[C8]\n"
    			:[SUM]"=r"(sum)
    			:[C1]"r"((int32_t)comb1buf[comb1index]),[C2]"r"((int32_t)comb2buf[comb2index]),[C3]"r"((int32_t)comb3buf[comb3index]),[C4]"r"((int32_t)comb4buf[comb4index]),[C5]"r"((int32_t)comb5buf[comb5index]),[C6]"r"((int32_t)comb6buf[comb6index]),[C7]"r"((int32_t)comb7buf[comb7index]),[C8]"r"((int32_t)comb8buf[comb8index])
    			:
    		);
    		
    		//output = asm_sat16((sum * 31457), 0x1FFFF, 17);
    		
    		asm volatile (
    			"MUL %[RESULT], %[SUM], %[CONST]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(output)
    			:[SUM]"r"((int32_t)sum),[CONST]"r"(31457),[R_RND]"r"(0x1FFFF),[I_SHIFT]"i"(17)
    			:
    			);
    		
    // Comb1
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb1buf[comb1index]),[INBOT]"r"(comb1filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb1filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb1buf[comb1index])
    			:[FIL]"r"(comb1filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    		
    // Comb2		
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb2buf[comb2index]),[INBOT]"r"(comb2filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb2filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb2buf[comb2index])
    			:[FIL]"r"(comb2filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    
    			
    // Comb3
    		
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb3buf[comb3index]),[INBOT]"r"(comb3filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb3filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb3buf[comb3index])
    			:[FIL]"r"(comb3filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    
    // Comb4
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb4buf[comb4index]),[INBOT]"r"(comb4filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb4filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb4buf[comb4index])
    			:[FIL]"r"(comb4filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    
    		
    		
    // Comb5			
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb5buf[comb5index]),[INBOT]"r"(comb5filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb5filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb5buf[comb5index])
    			:[FIL]"r"(comb5filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    
    		
    // Comb6		
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb6buf[comb6index]),[INBOT]"r"(comb6filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb6filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb6buf[comb6index])
    			:[FIL]"r"(comb6filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    		
    // Comb7
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb7buf[comb7index]),[INBOT]"r"(comb7filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb7filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    		asm volatile (
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb7buf[comb7index])
    			:[FIL]"r"(comb7filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    
    		
    // Comb8
    		asm volatile (
    			"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKA]"=&r"(PackA)
    			:[INTOP]"r"(comb8buf[comb8index]),[INBOT]"r"(comb8filter)
    			:
    		);
    		asm volatile (
    			"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
    			:[PACKB]"=&r"(PackB)
    			:[INTOP]"r"(combdamp2),[INBOT]"r"(combdamp1)
    			:
    		);
    		asm volatile(
    			"BIC %[RESULT], %[RESULT], #0\n"
    			"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(comb8filter)
    			:[PACKA]"r"(PackA),[PACKB]"r"(PackB),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:"memory"
    			);
    
    		
    		asm volatile(
    			"MUL %[RESULT], %[FIL], %[FB]\n"
    			"CMN %[RESULT], #0\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			"ADD %[RESULT], %[RESULT], %[IN]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(comb8buf[comb8index])
    			:[FIL]"r"(comb8filter),[IN]"r"((int32_t)input),[FB]"r"((int32_t)combfeeback),[R_RND]"r"(0x7FFF),[I_SHIFT]"i"(15)
    			:
    			);
    			
    
    		bufout = allpass1buf[allpass1index];
    		allpass1buf[allpass1index] = output + (bufout >> 1);
    		asm volatile(
    			"SUBS %[RESULT], %[BUF], %[OUT]\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(output)
    			:[BUF]"r"((int32_t)bufout),[OUT]"r"((int32_t)output),[R_RND]"r"(0x1),[I_SHIFT]"i"(1)
    			:
    			);
    				
    			
    		bufout = allpass2buf[allpass2index];
    		allpass2buf[allpass2index] = output + (bufout >> 1);
    		asm volatile(
    			"SUBS %[RESULT], %[BUF], %[OUT]\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(output)
    			:[BUF]"r"((int32_t)bufout),[OUT]"r"((int32_t)output),[R_RND]"r"(0x1),[I_SHIFT]"i"(1)
    			:
    			);
    		
    
    		bufout = allpass3buf[allpass3index];
    		allpass3buf[allpass3index] = output + (bufout >> 1);
    		asm volatile(
    			"SUBS %[RESULT], %[BUF], %[OUT]\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(output)
    			:[BUF]"r"((int32_t)bufout),[OUT]"r"((int32_t)output),[R_RND]"r"(0x1),[I_SHIFT]"i"(1)
    			:
    			);
    		
    
    		bufout = allpass4buf[allpass4index];
    		allpass4buf[allpass4index] = output + (bufout >> 1);
    		asm volatile(
    			"SUBS %[RESULT], %[BUF], %[OUT]\n"
    			"IT MI\n"
    		 	"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
    			:[RESULT]"=&r"(output)
    			:[BUF]"r"((int32_t)bufout),[OUT]"r"((int32_t)output),[R_RND]"r"(0x1),[I_SHIFT]"i"(1)
    			:
    			);
    		
    
    		asm volatile(
    			"MUL %[RESULT], %[OUT], %[CONST]\n"
    		 	"SSAT %[RESULT], #16, %[RESULT]\n"
    			:[RESULT]"=&r"(outblock->data[i])
    			:[OUT]"r"((int32_t)output),[CONST]"r"(30)
    			:
    			);
    			
    			
    		if (++allpass1index >= sizeof(allpass1buf)/sizeof(int16_t)) allpass1index = 0;
    		if (++allpass2index >= sizeof(allpass2buf)/sizeof(int16_t)) allpass2index = 0;
    		if (++allpass3index >= sizeof(allpass3buf)/sizeof(int16_t)) allpass3index = 0;
    		if (++allpass4index >= sizeof(allpass4buf)/sizeof(int16_t)) allpass4index = 0;
    		
    			if (++comb1index >= sizeof(comb1buf)/sizeof(int16_t)) comb1index = 0;
    			if (++comb2index >= sizeof(comb2buf)/sizeof(int16_t)) comb2index = 0;
    			if (++comb3index >= sizeof(comb3buf)/sizeof(int16_t)) comb3index = 0;	
    			if (++comb4index >= sizeof(comb4buf)/sizeof(int16_t)) comb4index = 0;
    			if (++comb5index >= sizeof(comb5buf)/sizeof(int16_t)) comb5index = 0;
    			if (++comb6index >= sizeof(comb6buf)/sizeof(int16_t)) comb6index = 0;
    			if (++comb7index >= sizeof(comb7buf)/sizeof(int16_t)) comb7index = 0;
    			if (++comb8index >= sizeof(comb8buf)/sizeof(int16_t)) comb8index = 0;
    
    		
    		
    	}
    	transmit(outblock);
    	release(outblock);
    	if (block != &zeroblock) release((audio_block_t *)block);
    
    #elif defined(KINETISL)
    	audio_block_t *block;
    	block = receiveReadOnly(0);
    	if (block) release(block);
    #endif
    }
    Last edited by aidyw; 08-18-2021 at 01:45 PM.

  2. #2
    Senior Member
    Join Date
    Apr 2014
    Location
    Germany
    Posts
    9,428
    This looks pretty good. I hope Paul merges it

  3. #3
    Senior Member
    Join Date
    Dec 2018
    Posts
    169
    Hi Aidyw

    I tried your code for a synth project. It works ! This is great !!
    I was able to save CPU time to and add new features.

    I tried to include your code as an independent library in my project, with no success. I added this line to my sketch :
    #include "effect_freeverb.h"
    Then I added in my root sketch folder theses files :
    effect_freeverb.h. //original audio lib file
    effect_freeverb.ccp //optimized freeverb code

    I doesn't compile...
    Any idea to include this optimised freeverb as an independent library ?
    That would be nice it was added also to the original audio lib.

    Emmanuel

  4. #4
    Member
    Join Date
    Sep 2016
    Location
    Berlin, Germany
    Posts
    23
    Quote Originally Posted by emmanuel63 View Post
    Any idea to include this optimised freeverb as an independent library ?
    l
    Hi, well I don't imagine that this is the best approach. In fact I am very much a newby to the Teensy Arduino environment. I am a little frustrated as I was not, at the time I made these changes, aware of the fact that there are a bunch of intrinsic functions within the Audio library that cover much, if not all, of what I ended up manually coding. ( In terms of SIMD maths and packing etc). They are in the utils folder of the library.
    I'm quite sure I could have avoided all that discrete asm by using these native library functions, which are themselves in asm. There may as a result of this be a little more overhead on stack pops and pushs, but thats a compiler question and it probably wont make much difference.

    I think it would be better to get this code into shape along the same lines as the rest of the Audio library, and that means considering different hardware as well. I didn't really do this to focus on expanding the library as such, but I'm glad you found it helpful. However I would prefer some comments directly from Paul, as he makes any final decisions regarding what gets pulled in or not.

    So all the best, we see what happens.

  5. #5
    Member
    Join Date
    Sep 2016
    Location
    Berlin, Germany
    Posts
    23
    Quote Originally Posted by aidyw View Post
    Code:
    		asm volatile (
    			"QADD %[SUM], %[C1], %[C2]\n"
    			"QADD %[SUM], %[C3], %[C4]\n"
    			"QADD %[SUM], %[C5], %[C6]\n"
    			"QADD %[SUM], %[C7], %[C8]\n"
    			:[SUM]"=r"(sum)
    			:[C1]"r"((int32_t)comb1buf[comb1index]),[C2]"r"((int32_t)comb2buf[comb2index]),[C3]"r"((int32_t)comb3buf[comb3index]),[C4]"r"((int32_t)comb4buf[comb4index]),[C5]"r"((int32_t)comb5buf[comb5index]),[C6]"r"((int32_t)comb6buf[comb6index]),[C7]"r"((int32_t)comb7buf[comb7index]),[C8]"r"((int32_t)comb8buf[comb8index])
    			:
    		);
    
    }
    Woops, spotted a fundamental flaw, ....

    Code:
    asm volatile (
    			"QADD %[SUM], %[C1], %[C2]\n"
    			"QADD %[SUM], %[C3], %[SUM]\n"
    			"QADD %[SUM], %[C4], %[SUM]\n"
    			"QADD %[SUM], %[C5], %[SUM]\n"
    			"QADD %[SUM], %[C6], %[SUM]\n"
    			"QADD %[SUM], %[C7], %[SUM]\n"
    			"QADD %[SUM], %[C8], %[SUM]\n"
    			:[SUM]"=r"(sum)
    			:[C1]"r"((int32_t)comb1buf[comb1index]),[C2]"r"((int32_t)comb2buf[comb2index]),[C3]"r"((int32_t)comb3buf[comb3index]),[C4]"r"((int32_t)comb4buf[comb4index]),[C5]"r"((int32_t)comb5buf[comb5index]),[C6]"r"((int32_t)comb6buf[comb6index]),[C7]"r"((int32_t)comb7buf[comb7index]),[C8]"r"((int32_t)comb8buf[comb8index])
    			:
    		);
    Fixed.

  6. #6
    Senior Member
    Join Date
    Apr 2014
    Location
    Germany
    Posts
    9,428
    With the currenty politic not to merge things and dozens of open PRs - I doubt that it will reach the official code-base, but : Can you do a PR? You'll never know...
    I think it's pretty good...

  7. #7
    Senior Member
    Join Date
    May 2018
    Posts
    138
    Hi @aidyw,

    thanks for your optimizations for freeverb! I tried to use your code including your changes as a replacement for freeverb on a T_3.6. It seems to work but I am getting strange sound artifacts (listen here).

    This is my (fixed) update method:

    Code:
    void AudioEffectFreeverbSIMD::update()
    {
    #if defined(__ARM_ARCH_7EM__)
      const audio_block_t *block;
      audio_block_t *outblock;
      int i;
      int16_t input, bufout, output;
      int32_t sum;
    
      uint32_t PackA, PackB;
    
    
      outblock = allocate();
      if (!outblock) {
        audio_block_t *tmp = receiveReadOnly(0);
        if (tmp) release(tmp);
        return;
      }
      block = receiveReadOnly(0);
      if (!block) block = &zeroblock;
    
    
      for (i = 0; i < AUDIO_BLOCK_SAMPLES; i++) {
    
        // TODO: scale numerical range depending on roomsize & damping
    
        asm volatile(
          "MUL %[RESULT], %[BLOCK], %[CONST]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(input)
          :[BLOCK]"r"((int32_t)block->data[i]), [CONST]"r"(8738), [R_RND]"r"(0x1FFFF), [I_SHIFT]"i"(17)
          :
        );
    
        sum = 0;
    
        asm volatile (
          "QADD %[SUM], %[C1], %[C2]\n"
          "QADD %[SUM], %[C3], %[SUM]\n"
          "QADD %[SUM], %[C4], %[SUM]\n"
          "QADD %[SUM], %[C5], %[SUM]\n"
          "QADD %[SUM], %[C6], %[SUM]\n"
          "QADD %[SUM], %[C7], %[SUM]\n"
          "QADD %[SUM], %[C8], %[SUM]\n"
          :[SUM]"=r"(sum)
          :[C1]"r"((int32_t)comb1buf[comb1index]), [C2]"r"((int32_t)comb2buf[comb2index]), [C3]"r"((int32_t)comb3buf[comb3index]), [C4]"r"((int32_t)comb4buf[comb4index]), [C5]"r"((int32_t)comb5buf[comb5index]), [C6]"r"((int32_t)comb6buf[comb6index]), [C7]"r"((int32_t)comb7buf[comb7index]), [C8]"r"((int32_t)comb8buf[comb8index])
          :
        );
    
        //output = asm_sat16((sum * 31457), 0x1FFFF, 17);
    
        asm volatile (
          "MUL %[RESULT], %[SUM], %[CONST]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(output)
          :[SUM]"r"((int32_t)sum), [CONST]"r"(31457), [R_RND]"r"(0x1FFFF), [I_SHIFT]"i"(17)
          :
        );
    
        // Comb1
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb1buf[comb1index]), [INBOT]"r"(comb1filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb1filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb1buf[comb1index])
          :[FIL]"r"(comb1filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
        // Comb2
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb2buf[comb2index]), [INBOT]"r"(comb2filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb2filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb2buf[comb2index])
          :[FIL]"r"(comb2filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
    
        // Comb3
    
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb3buf[comb3index]), [INBOT]"r"(comb3filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb3filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb3buf[comb3index])
          :[FIL]"r"(comb3filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
        // Comb4
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb4buf[comb4index]), [INBOT]"r"(comb4filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb4filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb4buf[comb4index])
          :[FIL]"r"(comb4filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
    
    
        // Comb5
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb5buf[comb5index]), [INBOT]"r"(comb5filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb5filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb5buf[comb5index])
          :[FIL]"r"(comb5filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
    
        // Comb6
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb6buf[comb6index]), [INBOT]"r"(comb6filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb6filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb6buf[comb6index])
          :[FIL]"r"(comb6filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
        // Comb7
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb7buf[comb7index]), [INBOT]"r"(comb7filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb7filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
        asm volatile (
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb7buf[comb7index])
          :[FIL]"r"(comb7filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
    
        // Comb8
        asm volatile (
          "PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKA]"=&r"(PackA)
          :[INTOP]"r"(comb8buf[comb8index]), [INBOT]"r"(comb8filter)
          :
        );
        asm volatile (
          "PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
          :[PACKB]"=&r"(PackB)
          :[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
          :
        );
        asm volatile(
          "BIC %[RESULT], %[RESULT], #0\n"
          "SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(comb8filter)
          :[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :"memory"
        );
    
    
        asm volatile(
          "MUL %[RESULT], %[FIL], %[FB]\n"
          "CMN %[RESULT], #0\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          "ADD %[RESULT], %[RESULT], %[IN]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(comb8buf[comb8index])
          :[FIL]"r"(comb8filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
          :
        );
    
    
        bufout = allpass1buf[allpass1index];
        allpass1buf[allpass1index] = output + (bufout >> 1);
        asm volatile(
          "SUBS %[RESULT], %[BUF], %[OUT]\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(output)
          :[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
          :
        );
    
    
        bufout = allpass2buf[allpass2index];
        allpass2buf[allpass2index] = output + (bufout >> 1);
        asm volatile(
          "SUBS %[RESULT], %[BUF], %[OUT]\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(output)
          :[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
          :
        );
    
    
        bufout = allpass3buf[allpass3index];
        allpass3buf[allpass3index] = output + (bufout >> 1);
        asm volatile(
          "SUBS %[RESULT], %[BUF], %[OUT]\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(output)
          :[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
          :
        );
    
    
        bufout = allpass4buf[allpass4index];
        allpass4buf[allpass4index] = output + (bufout >> 1);
        asm volatile(
          "SUBS %[RESULT], %[BUF], %[OUT]\n"
          "IT MI\n"
          "ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
          "SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
          :[RESULT]"=&r"(output)
          :[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
          :
        );
    
    
        asm volatile(
          "MUL %[RESULT], %[OUT], %[CONST]\n"
          "SSAT %[RESULT], #16, %[RESULT]\n"
          :[RESULT]"=&r"(outblock->data[i])
          :[OUT]"r"((int32_t)output), [CONST]"r"(30)
          :
        );
    
    
        if (++allpass1index >= sizeof(allpass1buf) / sizeof(int16_t)) allpass1index = 0;
        if (++allpass2index >= sizeof(allpass2buf) / sizeof(int16_t)) allpass2index = 0;
        if (++allpass3index >= sizeof(allpass3buf) / sizeof(int16_t)) allpass3index = 0;
        if (++allpass4index >= sizeof(allpass4buf) / sizeof(int16_t)) allpass4index = 0;
    
        if (++comb1index >= sizeof(comb1buf) / sizeof(int16_t)) comb1index = 0;
        if (++comb2index >= sizeof(comb2buf) / sizeof(int16_t)) comb2index = 0;
        if (++comb3index >= sizeof(comb3buf) / sizeof(int16_t)) comb3index = 0;
        if (++comb4index >= sizeof(comb4buf) / sizeof(int16_t)) comb4index = 0;
        if (++comb5index >= sizeof(comb5buf) / sizeof(int16_t)) comb5index = 0;
        if (++comb6index >= sizeof(comb6buf) / sizeof(int16_t)) comb6index = 0;
        if (++comb7index >= sizeof(comb7buf) / sizeof(int16_t)) comb7index = 0;
        if (++comb8index >= sizeof(comb8buf) / sizeof(int16_t)) comb8index = 0;
      }
      transmit(outblock);
      release(outblock);
      if (block != &zeroblock) release((audio_block_t *)block);
    
    #elif defined(KINETISL)
      audio_block_t *block;
      block = receiveReadOnly(0);
      if (block) release(block);
    #endif
    }
    Have I made something wrong?

    TIA, Holger

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •