maa@vims.edu
New member
Sometime Assembly coding is necessary to have a clean and precision control of Teensy 3.1. Would you please introduce the basic that can be used for Teensy 3.1? Thanks and best.
static inline void delayMicroseconds(uint32_t) __attribute__((always_inline, unused));
static inline void delayMicroseconds(uint32_t usec)
{
#if F_CPU == 96000000
uint32_t n = usec << 5;
#elif F_CPU == 48000000
uint32_t n = usec << 4;
#elif F_CPU == 24000000
uint32_t n = usec << 3;
#endif
if (usec == 0) return;
asm volatile(
"L_%=_delayMicroseconds:" "\n\t"
"subs %0, #1" "\n\t"
"bne L_%=_delayMicroseconds" "\n"
: "+r" (n) :
);
}
// computes (sum + ((a[31:0] * b[15:0]) >> 16))
static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
{
int32_t out;
asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
return out;
}
// computes (sum + ((a[31:0] * b[31:16]) >> 16))
static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
{
int32_t out;
asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
return out;
}
do {
a0 = *state++;
a1 = *state++;
a2 = *state++;
b1 = *state++;
b2 = *state++;
aprev = *state++;
bprev = *state++;
sum = *state & 0x3FFF;
data = end - AUDIO_BLOCK_SAMPLES/2;
do {
in2 = *data;
sum = signed_multiply_accumulate_32x16b(sum, a0, in2);
sum = signed_multiply_accumulate_32x16t(sum, a1, aprev);
sum = signed_multiply_accumulate_32x16b(sum, a2, aprev);
sum = signed_multiply_accumulate_32x16t(sum, b1, bprev);
sum = signed_multiply_accumulate_32x16b(sum, b2, bprev);
out2 = (uint32_t)sum >> 14;
sum &= 0x3FFF;
sum = signed_multiply_accumulate_32x16t(sum, a0, in2);
sum = signed_multiply_accumulate_32x16b(sum, a1, in2);
sum = signed_multiply_accumulate_32x16t(sum, a2, aprev);
sum = signed_multiply_accumulate_32x16b(sum, b1, out2);
sum = signed_multiply_accumulate_32x16t(sum, b2, bprev);
aprev = in2;
bprev = pack_16x16(sum >> 14, out2);