D
DeletedUser
Guest
I was interesting in finding out what the differences are between using the standard Teensy biquad library and using a direct form 1 32x32 MAC for a 6th order LPF at fc=1000Hz.
Enclosed, find the results.
In Conclusion, I found that:
1. There is an improvement to the frequency response (except for a -90 dB DC component).
2. There are 86.30 cycles/sample or 5.75 cycles/MAC
My question is:
1. How can I remove the DC component (which I believe is due to the fact that we loose 1 bit of information due to the rshift32 instead of shifting by 31)?
2. I would like to know what the number of cycles per sample is for the standard Teensy biquad library. (I don't have access to probe the ARM_DWT_CYCCNT at its input in my sketch).
Enclosed, find the results.
In Conclusion, I found that:
1. There is an improvement to the frequency response (except for a -90 dB DC component).
2. There are 86.30 cycles/sample or 5.75 cycles/MAC
My question is:
1. How can I remove the DC component (which I believe is due to the fact that we loose 1 bit of information due to the rshift32 instead of shifting by 31)?
2. I would like to know what the number of cycles per sample is for the standard Teensy biquad library. (I don't have access to probe the ARM_DWT_CYCCNT at its input in my sketch).
Code:
#include <Audio.h>
#include <Wire.h>
#include <SPI.h>
#include <SD.h>
#include <SerialFlash.h>
#include "AudioSampleSig1and3kHz.h"
#include "AudioSampleSig1and3kHz.h"
//#define DEBUG
AudioPlayMemory sound0;
AudioOutputUSB usb_out;
AudioFilterBiquad biquad1;
AudioOutputI2S i2s2;
AudioRecordQueue queue1;
AudioPlayQueue queue2;
AudioConnection patchCord1(sound0, 0, biquad1, 0);
AudioConnection patchCord2(sound0, 0, queue1, 0);
AudioConnection patchCord3(biquad1, 0, i2s2, 0);
AudioConnection patchCord4(queue2, 0, i2s2, 1);
AudioConnection patchCord5(biquad1, 0, usb_out, 0);
AudioConnection patchCord6(queue2, 0, usb_out, 1);
AudioControlSGTL5000 sgtl5000_1; //xy=523.2000122070312,327
//const int myInput = AUDIO_INPUT_LINEIN;
//const int myInput = AUDIO_INPUT_MIC;
//------------------------IIR parameters ---------------
//6th order iir lpfilter at 1100Hz (option 2 of calcfilt.m)
//-------- for standard biquad library ----------------
double coefs0[]=
{
#include "iircoefs0.h"
};
double coefs1[]=
{
#include "iircoefs1.h"
};
double coefs2[]=
{
#include "iircoefs2.h"
};
//--------- for myTeensyBiq ------------------
int32_t numi[]=
{
#include "numi.h"
};
int32_t deni[]=
{
#include "deni.h"
};
#define NUMCELLS 3
int32_t Dni[2],Ddi[2*NUMCELLS];
#ifdef DEBUG
unsigned long last_time = millis();
int cyc=0,nMACs;
#endif
//------------------------------------------------------
void setup() {
#ifdef DEBUG
Serial.begin(115200);
delay(500);
#endif
AudioMemory(12);
sgtl5000_1.enable(); // Enable the audio shield
// sgtl5000_1.inputSelect(myInput);
sgtl5000_1.volume(0.5);
biquad1.setCoefficients(0,coefs0);
biquad1.setCoefficients(1,coefs1);
biquad1.setCoefficients(2,coefs2);
// Start the record queue
queue1.begin();
}
void loop() {
if (sound0.isPlaying() == false)
{
sound0.play(AudioSampleSig1and3khz);
}
if (queue1.available() >= 1) //Left Channel
{
#ifdef DEBUG
cyc = ARM_DWT_CYCCNT;
myTeensyBiq(queue1.readBuffer(),queue2.getBuffer(),numi,deni,Dni,Ddi,NUMCELLS);
cyc = ARM_DWT_CYCCNT - cyc;
#else
myTeensyBiq(queue1.readBuffer(),queue2.getBuffer(),numi,deni,Dni,Ddi,NUMCELLS);
#endif
// Free the input audio buffer
queue1.freeBuffer();
// and play it back into the audio queue
queue2.playBuffer();
} //if (queue1.available() >= 1)
#ifdef DEBUG
if (millis() - last_time >= 2500)
{
nMACs=AUDIO_BLOCK_SAMPLES*NUMCELLS*5;
Serial.print(" myTeensyBiq cycles/sample=");
Serial.print((float)cyc/(float)AUDIO_BLOCK_SAMPLES);
Serial.print(" cycles/MAC=");
Serial.println((float)cyc/(float)nMACs);
last_time=millis();
}
#endif
}
void myTeensyBiq(int16_t *inbuf,int16_t *outbuf,int32_t *numi,int32_t *deni,int32_t *Dni,int32_t *Ddi,int numcells)
{
int coefnum,datanum,i,j;
int32_t input,res,accu;
int16_t *bp1,*bp2;
bp1=inbuf;
bp2=outbuf;
// Copy from input to output buffer
for( j = 0;j < AUDIO_BLOCK_SAMPLES;j++)
{
input=((int32_t)*bp1++)<<16;
coefnum=0;
datanum=0;
accu=multiply_32x32_rshift32_rounded(input,numi[coefnum]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Dni[datanum],numi[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Dni[datanum],numi[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Dni[datanum+1],numi[coefnum+2]);
Dni[datanum+1]=Dni[datanum];
Dni[datanum]=input;
for (i=1;i<numcells;i++)
{
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],-deni[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],-deni[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum+1],-deni[coefnum+2]);
res=accu<<1;
coefnum+=3;
accu=multiply_32x32_rshift32_rounded(res,numi[coefnum]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],numi[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],numi[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum+1],numi[coefnum+2]);
Ddi[datanum+1]=Ddi[datanum];
Ddi[datanum]=res;
datanum+=2;
}
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],-deni[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum],-deni[coefnum+1]);
accu=multiply_accumulate_32x32_rshift32_rounded(accu,Ddi[datanum+1],-deni[coefnum+2]);
Ddi[datanum+1]=Ddi[datanum];
Ddi[datanum]=accu<<1;
*bp2++=(int16_t)(accu>>15); //round result to 16 bits
}
}