I haven't seen any discussion on this forum on using CMSIS Version 5.3. I wanted to try utilizing the floating point FFTs in a signal filtering application. I'm familiar with the work that has been discussed on the Software Defined Radio and the Guitar Cabinet Simulation blogs using CMSIS Version 4, along with some of the reservations in integrating into the Teensy libraries due to overhead concerns. I was successful in integrating that library into my environment and was able to design a simple overlap-and-save filtering algorithm using a 200 point impulse response filter. I tried it out using 512 and 1024 point transforms. I was impressed with the floating point performance. For a forward FFT (using arm_rfft_fast_f32), complex multiply and inverse FFT using 512 points it took about 500 microseconds. For a 1024 point FFT it took approximately 900 microseconds. With regard to the program storage space for my demo program, here is the result of the compilation:

Sketch uses 106892 bytes (10%) of program storage space. Maximum is 1048576 bytes.

Global variables use 83612 bytes (31%) of dynamic memory, leaving 178532 bytes for local variables. Maximum is 262144 bytes.

I'm currently working on a realtime version of this filter and have the sampling rate running up to 3 microseconds/sample. The data are digitized using the onboard ADC, filtered, with the filtered results being sent through the DAC. The phase delay is running about 7.8 milliseconds using a 512 point FFT filter.

Here is the code I used to evaluate the performance described above.

/*

Filter a signal containing two frequencies using the overlap-save method.

Uses CMSIS Version 5.3.0 f32 routines on a Teensy 3.6

*/

// Common libraries:

#include <arm_math.h>

#include <arm_const_structs.h>

//#include <ref.h>

#define bufSize 8192

#define numpts 512

#define impulse_length_minus_1 199

static arm_status status;

static arm_rfft_fast_instance_f32 fft_instance;

boolean out_pin; // Use for timing

float32_t input_waveform[bufSize];

float32_t output_waveform[bufSize];

float32_t fft_aray[numpts]; // Array used to store fft result

float32_t window[numpts]; // Array used to store fft of filter data

float32_t cmplx_product[numpts]; // Array used to store complex product of fft result and window

float32_t Ak[numpts]; // Used to store data being sent to the fft routines

float32_t fft_result[numpts]; // Output from inverse fft

float32_t filter[numpts]; // Will contain the impulse that is terminated with zero padding

float32_t impulse[impulse_length_minus_1 + 1] = { // Impulse response for low-pass filter beginning at frequency bin 20 out of 128. Stop band > -100 dB

9.4978700E-08, -1.0099240E-06, -3.9555098E-06, -8.1674212E-06, -1.2233856E-05, -1.4147944E-05, -1.1836013E-05, -3.8747080E-06,

9.7844092E-06, 2.7311901E-05, 4.4937360E-05, 5.7465704E-05, 5.9374463E-05, 4.6323969E-05, 1.6752608E-05, -2.6873229E-05,

-7.7589710E-05, -1.2456752E-04, -1.5493964E-04, -1.5661931E-04, -1.2159463E-04, -4.8950325E-05, 5.3220769E-05, 1.6774265E-04,

2.7052492E-04, 3.3489880E-04, 3.3748708E-04, 2.6447337E-04, 1.1682254E-04, -8.7008106E-05, -3.1198579E-04, -5.1149393E-04,

-6.3594928E-04, -6.4365675E-04, -5.1175053E-04, -2.4469101E-04, 1.2205630E-04, 5.2479704E-04, 8.8124334E-04, 1.1058973E-03,

1.1285844E-03, 9.1242952E-04, 4.6714264E-04, -1.4606171E-04, -8.2023298E-04, -1.4193952E-03, -1.8040441E-03, -1.8610211E-03,

-1.5317851E-03, -8.3265360E-04, 1.3849318E-04, 1.2123019E-03, 2.1748259E-03, 2.8075305E-03, 2.9334421E-03, 2.4602629E-03,

1.4107964E-03, -6.7401987E-05, -1.7177495E-03, -3.2154444E-03, -4.2276791E-03, -4.4827877E-03, -3.8347517E-03, -2.3089211E-03,

-1.1716140E-04, 2.3642611E-03, 4.6536868E-03, 6.2518534E-03, 6.7434843E-03, 5.8931952E-03, 3.7152958E-03, 5.0023185E-04,

-3.2125723E-03, -6.7158793E-03, -9.2593249E-03, -1.0197256E-02, -9.1325789E-03, -6.0274401E-03, -1.2551129E-03, 4.4236323E-03,

9.9633829E-03, 1.4204956E-02, 1.6088985E-02, 1.4876395E-02, 1.0335310E-02, 2.8564602E-03, -6.5310283E-03, -1.6255904E-02,

-2.4406433E-02, -2.9011491E-02, -2.8359337E-02, -2.1302867E-02, -7.4992025E-03, 1.2460427E-02, 3.7058422E-02, 6.4002438E-02,

9.0503604E-02, 1.1364256E-01, 1.3076897E-01, 1.3987339E-01, 1.3987339E-01, 1.3076897E-01, 1.1364256E-01, 9.0503604E-02,

6.4002438E-02, 3.7058422E-02, 1.2460427E-02, -7.4992025E-03, -2.1302867E-02, -2.8359337E-02, -2.9011491E-02, -2.4406433E-02,

-1.6255904E-02, -6.5310283E-03, 2.8564602E-03, 1.0335310E-02, 1.4876395E-02, 1.6088985E-02, 1.4204956E-02, 9.9633829E-03,

4.4236323E-03, -1.2551129E-03, -6.0274401E-03, -9.1325789E-03, -1.0197256E-02, -9.2593249E-03, -6.7158793E-03, -3.2125723E-03,

5.0023185E-04, 3.7152958E-03, 5.8931952E-03, 6.7434843E-03, 6.2518534E-03, 4.6536868E-03, 2.3642611E-03, -1.1716140E-04,

-2.3089211E-03, -3.8347517E-03, -4.4827877E-03, -4.2276791E-03, -3.2154444E-03, -1.7177495E-03, -6.7401987E-05, 1.4107964E-03,

2.4602629E-03, 2.9334421E-03, 2.8075305E-03, 2.1748259E-03, 1.2123019E-03, 1.3849318E-04, -8.3265360E-04, -1.5317851E-03,

-1.8610211E-03, -1.8040441E-03, -1.4193952E-03, -8.2023298E-04, -1.4606171E-04, 4.6714264E-04, 9.1242952E-04, 1.1285844E-03,

1.1058973E-03, 8.8124334E-04, 5.2479704E-04, 1.2205630E-04, -2.4469101E-04, -5.1175053E-04, -6.4365675E-04, -6.3594928E-04,

-5.1149393E-04, -3.1198579E-04, -8.7008106E-05, 1.1682254E-04, 2.6447337E-04, 3.3748708E-04, 3.3489880E-04, 2.7052492E-04,

1.6774265E-04, 5.3220769E-05, -4.8950325E-05, -1.2159463E-04, -1.5661931E-04, -1.5493964E-04, -1.2456752E-04, -7.7589710E-05,

-2.6873229E-05, 1.6752608E-05, 4.6323969E-05, 5.9374463E-05, 5.7465704E-05, 4.4937360E-05, 2.7311901E-05, 9.7844092E-06,

-3.8747080E-06, -1.1836013E-05, -1.4147944E-05, -1.2233856E-05, -8.1674212E-06, -3.9555098E-06, -1.0099240E-06, 9.4978700E-08

};

float pi = 3.1415926535897932f;

float freq1; // Want to keep freq1

float freq2; // Want to filter out freq2

void setup()

{

Serial.begin (230400);

// Set up output ports for timing purposes

PORTC_PCR4 = ( unsigned long ) 0x00000101; // Section 12.5.1, pg. 221 - 222, see Pin Mux Control

// Bit 0 -> Pull Select -> Internal pullup resistor is enabled

// Bit 8 -> ALT1 -> GPIO

PORTC_PCR5 = ( unsigned long ) 0x00000101; // Section 12.5.1, pg. 221 - 222, see Pin Mux Control

// Bit 0 -> Pull Select -> Internal pullup resistor is enabled

// Bit 8 -> ALT1 -> GPIO

GPIOC_PDDR = ( unsigned long ) 0x00000020; // Configure pin 13 as output. See Section 63.3.6. Pin 13 is PTC5 (on-board LED)

// Build waveform for processing

freq1 = 5. * (float)numpts/(float)(impulse_length_minus_1 + 1); //Want to keep freq1

freq2 = 4. * freq1; //Want to filter out freq2. For a 256 point FFT the 3 dB cutoff frequency is at about frequency bin #19

for (int i=0; i < (bufSize); i ++) // Calculate signal to filter over bufSize points

{

input_waveform[i] = sin(2. * pi * freq1 * (float)i/(float)numpts) + sin(2. * pi * freq2 * (float)i/(float)numpts);

}

// Build impulse filter array. Need to zero pad from the end of the impulse to numpts (end of the array).

arm_fill_f32(0.0, filter, numpts);

for (int i = 0; i < impulse_length_minus_1 + 1; i++) filter[i] = impulse[i];

status = arm_rfft_fast_init_f32(&fft_instance, numpts); // Initialize the fft routines

arm_rfft_fast_f32(&fft_instance, filter, window, 0); // Calculate the spectral response of the filter.

GPIOC_PSOR |= (1 << 5); // Section 63.3.2 - Set pin 13 HIGH

out_pin = true;

delay(5000); // Give plenty of time to trigger scope and to select either the Serial Monitor or Serial Plotter

}

void loop()

{

arm_fill_f32(0.0, output_waveform, bufSize);

arm_fill_f32(0.0, input_waveform, impulse_length_minus_1); //Need to zero pad the beginning of the signal so the circular convolution matches a linear convolution.

// This loop for processing input_waveform data completes in about 500 microseconds per pass for numpts = 512

// 900 microseconds per pass for numpts = 1024

for (int i = 0; i < bufSize/(numpts - impulse_length_minus_1); i++) { // Use for overlap to avoid circular convolution

if (out_pin) GPIOC_PCOR |= (1 << 5); // Section 63.3.1 - Set pin 13 LOW -- Use for scope timing

else

GPIOC_PSOR |= (1 << 5); // Section 63.3.2 - Set pin 13 HIGH

out_pin = !out_pin;

arm_copy_f32(input_waveform + i * (numpts - impulse_length_minus_1), Ak, numpts);

arm_rfft_fast_f32(&fft_instance, Ak, fft_aray, 0);

arm_cmplx_mult_cmplx_f32(fft_aray, window, cmplx_product, numpts/2); // Multiplication in frequency domain is a convolution in the time domain

arm_rfft_fast_f32(&fft_instance, cmplx_product, fft_result, 1); // Inverse transform

arm_copy_f32(fft_result + impulse_length_minus_1, output_waveform + i * (numpts - impulse_length_minus_1), numpts - impulse_length_minus_1);

}

for (int i = 0; i < 1000; i++) Serial.println(output_waveform[i]); //Use to drive the serial plotter to display the second set of 500 points

while(1) {}

}