Code:
void AudioEffectFreeverbSIMD::update()
{
#if defined(__ARM_ARCH_7EM__)
const audio_block_t *block;
audio_block_t *outblock;
int i;
int16_t input, bufout, output;
int32_t sum;
uint32_t PackA, PackB;
outblock = allocate();
if (!outblock) {
audio_block_t *tmp = receiveReadOnly(0);
if (tmp) release(tmp);
return;
}
block = receiveReadOnly(0);
if (!block) block = &zeroblock;
for (i = 0; i < AUDIO_BLOCK_SAMPLES; i++) {
// TODO: scale numerical range depending on roomsize & damping
asm volatile(
"MUL %[RESULT], %[BLOCK], %[CONST]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(input)
:[BLOCK]"r"((int32_t)block->data[i]), [CONST]"r"(8738), [R_RND]"r"(0x1FFFF), [I_SHIFT]"i"(17)
:
);
sum = 0;
asm volatile (
"QADD %[SUM], %[C1], %[C2]\n"
"QADD %[SUM], %[C3], %[SUM]\n"
"QADD %[SUM], %[C4], %[SUM]\n"
"QADD %[SUM], %[C5], %[SUM]\n"
"QADD %[SUM], %[C6], %[SUM]\n"
"QADD %[SUM], %[C7], %[SUM]\n"
"QADD %[SUM], %[C8], %[SUM]\n"
:[SUM]"=r"(sum)
:[C1]"r"((int32_t)comb1buf[comb1index]), [C2]"r"((int32_t)comb2buf[comb2index]), [C3]"r"((int32_t)comb3buf[comb3index]), [C4]"r"((int32_t)comb4buf[comb4index]), [C5]"r"((int32_t)comb5buf[comb5index]), [C6]"r"((int32_t)comb6buf[comb6index]), [C7]"r"((int32_t)comb7buf[comb7index]), [C8]"r"((int32_t)comb8buf[comb8index])
:
);
//output = asm_sat16((sum * 31457), 0x1FFFF, 17);
asm volatile (
"MUL %[RESULT], %[SUM], %[CONST]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(output)
:[SUM]"r"((int32_t)sum), [CONST]"r"(31457), [R_RND]"r"(0x1FFFF), [I_SHIFT]"i"(17)
:
);
// Comb1
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb1buf[comb1index]), [INBOT]"r"(comb1filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb1filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb1buf[comb1index])
:[FIL]"r"(comb1filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb2
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb2buf[comb2index]), [INBOT]"r"(comb2filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb2filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb2buf[comb2index])
:[FIL]"r"(comb2filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb3
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb3buf[comb3index]), [INBOT]"r"(comb3filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb3filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb3buf[comb3index])
:[FIL]"r"(comb3filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb4
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb4buf[comb4index]), [INBOT]"r"(comb4filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb4filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb4buf[comb4index])
:[FIL]"r"(comb4filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb5
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb5buf[comb5index]), [INBOT]"r"(comb5filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb5filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb5buf[comb5index])
:[FIL]"r"(comb5filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb6
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb6buf[comb6index]), [INBOT]"r"(comb6filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb6filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb6buf[comb6index])
:[FIL]"r"(comb6filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb7
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb7buf[comb7index]), [INBOT]"r"(comb7filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb7filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile (
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb7buf[comb7index])
:[FIL]"r"(comb7filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
// Comb8
asm volatile (
"PKHBT %[PACKA], %[INBOT], %[INTOP], LSL #16\n"
:[PACKA]"=&r"(PackA)
:[INTOP]"r"(comb8buf[comb8index]), [INBOT]"r"(comb8filter)
:
);
asm volatile (
"PKHBT %[PACKB], %[INBOT], %[INTOP], LSL #16\n"
:[PACKB]"=&r"(PackB)
:[INTOP]"r"(combdamp2), [INBOT]"r"(combdamp1)
:
);
asm volatile(
"BIC %[RESULT], %[RESULT], #0\n"
"SMLAD %[RESULT], %[PACKA], %[PACKB], %[RESULT]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(comb8filter)
:[PACKA]"r"(PackA), [PACKB]"r"(PackB), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:"memory"
);
asm volatile(
"MUL %[RESULT], %[FIL], %[FB]\n"
"CMN %[RESULT], #0\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
"ADD %[RESULT], %[RESULT], %[IN]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(comb8buf[comb8index])
:[FIL]"r"(comb8filter), [IN]"r"((int32_t)input), [FB]"r"((int32_t)combfeeback), [R_RND]"r"(0x7FFF), [I_SHIFT]"i"(15)
:
);
bufout = allpass1buf[allpass1index];
allpass1buf[allpass1index] = output + (bufout >> 1);
asm volatile(
"SUBS %[RESULT], %[BUF], %[OUT]\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(output)
:[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
:
);
bufout = allpass2buf[allpass2index];
allpass2buf[allpass2index] = output + (bufout >> 1);
asm volatile(
"SUBS %[RESULT], %[BUF], %[OUT]\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(output)
:[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
:
);
bufout = allpass3buf[allpass3index];
allpass3buf[allpass3index] = output + (bufout >> 1);
asm volatile(
"SUBS %[RESULT], %[BUF], %[OUT]\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(output)
:[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
:
);
bufout = allpass4buf[allpass4index];
allpass4buf[allpass4index] = output + (bufout >> 1);
asm volatile(
"SUBS %[RESULT], %[BUF], %[OUT]\n"
"IT MI\n"
"ADDMI %[RESULT], %[RESULT], %[R_RND]\n"
"SSAT %[RESULT], #16, %[RESULT], ASR %[I_SHIFT]\n"
:[RESULT]"=&r"(output)
:[BUF]"r"((int32_t)bufout), [OUT]"r"((int32_t)output), [R_RND]"r"(0x1), [I_SHIFT]"i"(1)
:
);
asm volatile(
"MUL %[RESULT], %[OUT], %[CONST]\n"
"SSAT %[RESULT], #16, %[RESULT]\n"
:[RESULT]"=&r"(outblock->data[i])
:[OUT]"r"((int32_t)output), [CONST]"r"(30)
:
);
if (++allpass1index >= sizeof(allpass1buf) / sizeof(int16_t)) allpass1index = 0;
if (++allpass2index >= sizeof(allpass2buf) / sizeof(int16_t)) allpass2index = 0;
if (++allpass3index >= sizeof(allpass3buf) / sizeof(int16_t)) allpass3index = 0;
if (++allpass4index >= sizeof(allpass4buf) / sizeof(int16_t)) allpass4index = 0;
if (++comb1index >= sizeof(comb1buf) / sizeof(int16_t)) comb1index = 0;
if (++comb2index >= sizeof(comb2buf) / sizeof(int16_t)) comb2index = 0;
if (++comb3index >= sizeof(comb3buf) / sizeof(int16_t)) comb3index = 0;
if (++comb4index >= sizeof(comb4buf) / sizeof(int16_t)) comb4index = 0;
if (++comb5index >= sizeof(comb5buf) / sizeof(int16_t)) comb5index = 0;
if (++comb6index >= sizeof(comb6buf) / sizeof(int16_t)) comb6index = 0;
if (++comb7index >= sizeof(comb7buf) / sizeof(int16_t)) comb7index = 0;
if (++comb8index >= sizeof(comb8buf) / sizeof(int16_t)) comb8index = 0;
}
transmit(outblock);
release(outblock);
if (block != &zeroblock) release((audio_block_t *)block);
#elif defined(KINETISL)
audio_block_t *block;
block = receiveReadOnly(0);
if (block) release(block);
#endif
}
Have I made something wrong?