By using a N point complex FFT you can calculate two real valued FFT's. There is a little overhead in setting up the input buffer to the complex FFT and recovering the 2 outputs. I'm sure the combine and recover stages can be optimized but I'll leave that for later, the performance increase is fairly large considering. The sketch below shows how I did it. I used the "complex q15" data type in this example.
You will see that for q15 data type there is some round off error that needs to worked out but the 2 for 1 FFT outputs should be theoretically exact.
Code:
#include "arm_math.h"
#include <complex.h>
#define FFT_SIZE 1024
arm_cfft_radix4_instance_q15 fft_inst;
// q15 complex data type
typedef q15_t __complex__ cplx;
// buffers for the FFT's
cplx Standard_FFT_Buffer1[FFT_SIZE];
cplx Standard_FFT_Buffer2[FFT_SIZE];
cplx Combined_FFT_Buffer[FFT_SIZE];
cplx Two_For_One_FFT_Buffer1[FFT_SIZE];
cplx Two_For_One_FFT_Buffer2[FFT_SIZE];
void setup() {
while(!Serial);
delay(1000);
Serial.println("Two for one Real FFT test");
// standard FFT setup
arm_cfft_radix4_init_q15(&fft_inst, FFT_SIZE, 0, 1);
// -------------------------------------------------------------------------
// fill standard FFT buffers, these are later interleaved into the "Combined_FFT_Buffer"
for (int i = 0; i < FFT_SIZE; i++) {
__real__ Standard_FFT_Buffer1[i] = random(-0x7FFF, 0x7FFF);
__real__ Standard_FFT_Buffer2[i] = random(-0x7FFF, 0x7FFF);
__imag__ Standard_FFT_Buffer1[i] = 0;
__imag__ Standard_FFT_Buffer2[i] = 0;
}
elapsedMicros time = 0;
// -------------------------------------------------------------------------
// interleave the two buffers into real and imaginary parts of the "Combined_FFT_Buffer"
// Standard_FFT_Buffer1 -> real
// Standard_FFT_Buffer2 -> imag
combine_buffers(Standard_FFT_Buffer1, Standard_FFT_Buffer2, Combined_FFT_Buffer, FFT_SIZE);
// -------------------------------------------------------------------------
// perform 2 for 1 FFT
arm_cfft_radix4_q15(&fft_inst, (q15_t*)Combined_FFT_Buffer);
// -------------------------------------------------------------------------
// extracts the two Real val FFT's from the one complex FFT output
extract_buffers(Combined_FFT_Buffer, Two_For_One_FFT_Buffer1, Two_For_One_FFT_Buffer2, FFT_SIZE);
uint32_t t1 = time;
time = 0;
// -------------------------------------------------------------------------
arm_cfft_radix4_q15(&fft_inst, (q15_t*)Standard_FFT_Buffer1);
// -------------------------------------------------------------------------
arm_cfft_radix4_q15(&fft_inst, (q15_t*)Standard_FFT_Buffer2);
// -------------------------------------------------------------------------
uint32_t t2 = time;
Serial.println();
Serial.printf("2 For One FFT Time:\t%i\n", t1);
Serial.printf("Two Standard FFTs Time:\t%i\n\n", t2);
Serial.println("Test to see if the 2 for 1 FFT gives the same output as two standard Real FFTs");
Serial.println();
Serial.println("Standard_FFT_Buffer1 and Two_For_One_FFT_Buffer1");
Serial.println();
Serial.println("Standard_FFT_|_2_For_1_FFT__||_Standard_FFT_|_2_For_1_FFT_");
for (int i = 0; i < FFT_SIZE; i++) {
Serial.printf("real: %6i | ", __real__ Standard_FFT_Buffer1[i]);
Serial.printf("real: %6i || ", __real__ Two_For_One_FFT_Buffer1[i]);
Serial.printf("imag: %6i | ", __imag__ Standard_FFT_Buffer1[i]);
Serial.printf("imag: %6i\n", __imag__ Two_For_One_FFT_Buffer1[i]);
}
Serial.println("-----------------------------------------------------------");
// -------------------------------------------------------------------------
Serial.println();
Serial.println("Standard_FFT_Buffer2 and Two_For_One_FFT_Buffer2");
Serial.println();
Serial.println("Standard_FFT_|_2_For_1_FFT__||_Standard_FFT_|_2_For_1_FFT_");
for (int i = 0; i < FFT_SIZE; i++) {
Serial.printf("real: %6i | ", __real__ Standard_FFT_Buffer2[i]);
Serial.printf("real: %6i || ", __real__ Two_For_One_FFT_Buffer2[i]);
Serial.printf("imag: %6i | ", __imag__ Standard_FFT_Buffer2[i]);
Serial.printf("imag: %6i\n", __imag__ Two_For_One_FFT_Buffer2[i]);
}
Serial.println("-----------------------------------------------------------");
}
void loop() {
}
//----------------------------------------------------------------
void combine_buffers(cplx *buf1, cplx *buf2, cplx *out_buf, uint16_t fft_size) {
for (int i = 0; i < fft_size; i++) {
__real__ out_buf[i] = __real__ buf1[i];
__imag__ out_buf[i] = __real__ buf2[i];
}
}
//----------------------------------------------------------------
void extract_buffers(cplx *fft_buf, cplx *buf1, cplx *buf2, uint16_t fft_size) {
__real__ buf1[0] = __real__ fft_buf[0];
__imag__ buf1[0] = 0;
__real__ buf2[0] = __imag__ fft_buf[0];
__imag__ buf2[0] = 0;
for (int i = 1; i < fft_size / 2; i++) {
q15_t x1r = (__real__ fft_buf[i] + __real__ fft_buf[fft_size - i]) >> 1;
q15_t x1i = (__imag__ fft_buf[i] - __imag__ fft_buf[fft_size - i]) >> 1;
q15_t x2r = (__imag__ fft_buf[i] + __imag__ fft_buf[fft_size - i]) >> 1;
q15_t x2i = -(__real__ fft_buf[i] - __real__ fft_buf[fft_size - i]) >> 1;
__real__ buf1[i] = x1r;
__imag__ buf1[i] = x1i;
__real__ buf1[fft_size - i] = x1r;
__imag__ buf1[fft_size - i] = -x1i;
__real__ buf2[i] = x2r;
__imag__ buf2[i] = x2i;
__real__ buf2[fft_size - i] = x2r;
__imag__ buf2[fft_size - i] = -x2i;
}
__real__ buf1[fft_size / 2] = (__real__ fft_buf[fft_size / 2]);
__imag__ buf1[fft_size / 2] = 0;
__real__ buf2[FFT_SIZE / 2] = __imag__ fft_buf[fft_size / 2];
__imag__ buf2[FFT_SIZE / 2] = 0;
}
Last edited: