Forum Rule: Always post complete source code & details to reproduce any issue!

During the thread on megasample collection with the T4X https://forum.pjrc.com/threads/60299...-this-possible we discussed various ways to evaluate noise and the best estimate of a stable signal on recorded ADC data. There were proponents of Pk-Pk noise, mean and standard deviation and median. I decided to evaluate the possibilities and results with both simulated normally-distributed noise and with real-world ADC data. I found that in most cases the mean and median were the same as a measure of central tendency for almost all the case with 12-bit data.

To accomplish this testing, I wrote the attached application that I have been running on a T4.1. The app has the following features:

* You can vary the ADC sampling and conversion speed.
* You can switch between ADC data and simulated data. With simulated data, what used to control ADC sampling sets the standard deviation of the simulated data.
* As part of the results there is a display of histogram data from the data set.
* There is a very fast median calculator---about 10X the speed of calculating the median by selecting the middle entry in quicksorted data.
* There is a very remedial plotting function that you can use with the Arduino Serial Plotter to plot the histogram data.
* You can add fake noise spikes to the data to see how they affect the statistics.

Here is the code:

Code:
```/******************************************************************
Program to demonstrate some sampling statistics to evaluate noise
Included is a fast algorithm for calculation of median and histogram
data for 12-bit integer ADC counts.

You can also add fake noise spikes to the data to see the effect
on the mean, median, std. deviation and histogram

M. Borgerson  8/18/2020
*********************************************************************/

const int ledpin    = 13;

#define SIMMEAN 2200 // asumed mean for simulated data
#define SPIKEAMPLITUDE 20  // spike amplitude in counts--spikes always positive
#define SPIKEINTERVAL 100  // number of samples between spikes

const char compileTime [] = "T4.1 Noise Measurement Test Compiled on " __DATE__ " " __TIME__;
//NOTE:  If you less than 65535 samples, you should change the bins array to uint16_t
//       to save some space.  Teensies have lots of memory, so 16KB for bins isn't an issue.
//       However, if you collect 16-bit data, 65536 bins might be a problem.

// if you want a LOT of samples on a T4.1 with PSRAM increase this to 4 million and put
// idata in EXTMEM
#define MAXVALS 40000
#define MAXBINS 4096

#define  LEDON digitalWriteFast(ledpin, HIGH);
#define  LEDOFF digitalWriteFast(ledpin, LOW);

uint16_t idata[MAXVALS];
uint32_t bins[MAXBINS];  // sufficient for up to LOTS of 12-bit data

// instantiate a new ADC object
bool simdata = true;
bool noisespikes = false;
void setup() {
pinMode(ledpin, OUTPUT);
pinMode(A0, INPUT_DISABLE); // disable digital keeper resistors

Serial.begin(9600);
delay(500);  // wait for Serial to open
Serial.println();
Serial.println(compileTime);
Serial.printf("CPU Frequency: %lu\n", F_CPU_ACTUAL);
srand(micros());  // use micros() to set seed

}

void loop() {
char ch;
uint16_t cval;
if (Serial.available()) {
switch (ch) {
case 'b' :  // for checking serial plotter input
LEDON
delay(1000);
LEDOFF
break;
case 's' :
Serial.println("Simulated data will be used.");
simdata = true;

break;
case 'a' :
Serial.println("Analog data will be used.");
simdata = false;
break;
case 'n' :
noisespikes = !noisespikes;
Serial.print("Noise spikes are " );
if (noisespikes) {
Serial.println(" ON");
} else {
Serial.println(" OFF");
}

break;

case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
cval = ch & 0x07;
GenData(idata, MAXVALS, cval); // cval controls std for simulation, or ADC speed
HistoStats(idata, MAXVALS);
break;
case 'g'  :
HistoGraph(bins);
LEDON
delay(100);
HistoGraph(bins);
LEDOFF
break;
} // end of switch(ch)
} // end of if(Serial.available....
}  // end of loop()

// send bin data to Serial() so that Serial Minitor can plot it
// Doesn't plot any zero values, so low-count tails may be messed up
#define BINWIDTH 10
void HistoGraph(uint32_t gbins[]) {
uint16_t i, j;
// Serial.println("You have 10 seconds to switch to graph mode <Ctl-shift-L>)");
delay(10000);
for (i = 0; i < MAXBINS; i++) {
if (gbins[i] > 0) {
for (j = 0 ; j < BINWIDTH; j++) {
Serial.println(gbins[i]);
delay(10);
}
} // end of if(gbins[i] > 0)
} // end of for(i=0...
}

// Do some stats on values in idat
void HistoStats(uint16_t idat[], uint32_t numvals) {
uint32_t startmicro, dmicro;
uint16_t  median, imin, imax;
float mean, std;
Serial.println("\nComputing statistics.");
delay(5);

MeanEtc(idat, numvals, &mean, &std, &imin, &imax);
Serial.printf("\nmean : % 8.4f ", mean);
Serial.printf("std. dev:  % 8.4f   ", std);
Serial.printf("min = % u   max = % u\n \n", imin, imax);

Serial.println();
startmicro = micros();
median = binMedian(idat, numvals);
dmicro = micros() - startmicro;
Serial.printf("%lu sample bin Median    = % u  ", numvals, median);
Serial.printf("  took % 4lu microseconds\n", dmicro);

startmicro = micros();
qsort(idat, numvals, sizeof(uint16_t), cmpfunc);
median = idat[numvals / 2];
dmicro = micros() - startmicro;
Serial.printf("%lu sample QSort Median  = % u  ", numvals, median);
Serial.printf("  took % 4lu microseconds\n\n", dmicro);

// Now show histogram from bins data
ShowHisto(bins, imin, imax);
}

void GenData(uint16_t dat[], uint32_t nvals,  uint16_t istd) {
if (simdata) {
SimGenData(dat, nvals, SIMMEAN,  istd);
} else {
ADCGenData(dat,  nvals,  istd); // mean set by ADC input, istd controls speed setting
}
}

// do the  easy stats
void MeanEtc(uint16_t dat[], uint32_t nvals, float *mnptr, float *stptr, uint16_t *minp, uint16_t *maxp) {
uint32_t i;
uint16_t imin, imax, ival;
float  var, fv, s1, s2, mean;
double  sum, sumsq; // with 4 million samples, even 32-bit floats have rounding problems!
// NOTE SUMSQ gets LARGE: suppose the values are ~4000 and you sum 100,000 of them then the sumsq
//       is about (4x10^3) * (4x10^3)  * (1x10^5)   or 1.6x10^12 which is way too big for a uint32_t
sum = 0.0;
sumsq = 0.0;
imin = 65535;
imax = 0;
// first get sum and calculate mean
for (i = 0; i < nvals; i++) {
ival = dat[i];
if (ival < imin) imin = ival;
if (ival > imax) imax = ival;
sum += (float)ival;
sumsq += (float)ival * (float)ival;
}
mean = sum / nvals;
// now rcalculate variance in tw-pass algorithm to minimize roundoff error
s1 = 0; s2 = 0;
for (i = 0; i < nvals; i++) {
fv = (float)dat[i] - mean;
s1 += (fv * fv );
s2 += fv;
}
var = (s1 - s2 * s2 / nvals) / (nvals - 1);
*mnptr = mean;
//Serial.printf("Variance: %8.3ef  s1: %8.3e  s2^2/n: %8.3e\n", var, s1, s2*s2/nvals);
*stptr = sqrt(var);
*minp = imin;
*maxp = imax;
}

// Since this is aimed at evaluating noise of 12-bit ADC data,
// the generated numbers are constrained to values from 0 to 4095
// The numbers are also constrained to mean +/- 4 *std dev
void SimGenData(uint16_t dat[], uint32_t nvals,  uint16_t imean, uint16_t istd) {
float  x, fmean, fstd;
uint16_t ival;
uint32_t i;
fmean = imean;
fstd = istd;

if (nvals > MAXVALS) nvals = MAXVALS; // stay within targt array
Serial.println("\n\nGenerating Random Normally distributed data.");
for (i = 0; i < nvals; i++) {
LEDON
do {
x = fastrnorm(fmean, fstd);
ival = floor(x);
// if noisespikes are on add a spike every 100 samples
if (ival > MAXBINS - 1) badresult = true;
if (ival > (fmean + 6 * fstd)) badresult = true;
if (ival < (fmean - 6 * fstd)) badresult = true;

} while (badresult);  // fall through when result in bounds
if (noisespikes) {
if ((i % SPIKEINTERVAL) == (SPIKEINTERVAL - 1)) {
ival += SPIKEAMPLITUDE;
}
}
if (ival > MAXBINS - 1) ival = MAXBINS - 1; // even spikes can get clipped!
LEDOFF

dat[i] = ival;
}  // end of for(i=0...

}

// Collect ADC Data from A0.  dmean is ignored.  dstd
// controls ADC speed settings.  No fancy timing control--
// just collect analog data as fast as analogRead will
// provide it.
void ADCGenData(uint16_t dat[], uint32_t nvals, uint16_t spd) {
uint32_t i, startmicro, dmicro;
uint16_t ival;
startmicro = micros();
if (nvals > MAXVALS) nvals = MAXVALS;
LEDON
for (i = 0; i < nvals; i++) {
// if noisespikes are on add a spike every 100 samples
if (noisespikes) {
if ((i % SPIKEINTERVAL) == (SPIKEINTERVAL - 1)) {
ival += SPIKEAMPLITUDE;
}
}
if (ival > 4095) ival = 4095;
dat[i] = ival;
}  // end of for(i=0...

LEDOFF
dmicro = micros() - startmicro;
Serial.printf("Collecting % lu  samples took % 4lu microseconds\n", nvals, dmicro);
}

// Set ADC Speed  from low to very high

switch (spd) {
case 1:
Serial.println("ADC Speed set to 12 - bit LOW_SPEED.");
break;
case 2:
Serial.println("ADC Speed set to 12 - bit MED_SPEED.");
break;
case 3:
Serial.println("ADC Speed set to 12 - bit HIGH_SPEED.");
break;
case 4:
Serial.println("ADC Speed set to 12 - bit VERY_HIGH range.");
break;
case 5:
Serial.println("ADC Speed set to 8 - bit HIGH_SPEED.");
break;
}
}

// finds median without recursion, comparison or sorting
// takes about 140mSec to find median of 100,000 records
uint16_t binMedian(uint16_t dat[], uint32_t numsamp) {
uint32_t i, bsum;
uint16_t val;

// empty the bins--they're on the stack and have random values
memset(bins, 0, sizeof(bins));
//  Serial.printf("In binMedian, address of dat is % p\n", &dat);
//   delay(5);
for (i = 0; i < numsamp; i++) {
val = dat[i];

if (val < MAXBINS) { // prevent writing past end of bins array
bins[val]++;
}

} // bins are filled
i = 0; bsum = 0;
do {
bsum += bins[i];
i++;
}  while (bsum < numsamp / 2);

return i - 1;
}

void ShowHisto(uint32_t bins[], uint16_t imin, uint16_t imax) {
uint16_t i, lnum;
lnum = 0;
Serial.print("Histogram Data");
for (i = imin - 1; i < imax + 2; i++) {
if ((lnum % 10) == 0) Serial.printf("\n % 4d: ", i);
lnum++;
Serial.printf(" % 5lu ", bins[i]);
}
Serial.println();
}

// These are the data collection and simulation routines
inline int cmpfunc (const void * a, const void * b) {
uint16_t ia = *(uint16_t *)a;
uint16_t ib = *(uint16_t *)b;
return ( ia - ib);
}

// returns a random floating point value between 0.0 and 1.0
// we use the built-in rand() function for speed as we are
// not particularly picky about the characteristics of the number
float randf(void) {
float val;
val = (float)rand() / ((float)(RAND_MAX) - 0.5);
return val;
}
// convert floating rnd to normal curve
float fastrnorm(float mean, float stdDev)
{
static float spare;
static float u1;
static float u2;
static float s;

{
return ((spare * stdDev) + mean);
} else {
do {
u1 = (randf() * 2) - 1;
u2 = (randf() * 2) - 1;
s = (u1 * u1) + (u2 * u2);
} while (s >= 1.0);
s = sqrtf(-2.0 * logf(s) / s);
spare = u2 * s;
return (mean + (stdDev * u1 * s));
}
}```

2. In my T4.0 setup, median results in a peak-to-peak of 9 counts and mean results in 38 counts. So in some cases, it makes a huge difference. Noise isn't always Gaussian - try both mean and median for any specific situation.

3. I'm not sure what your numbers mean. Mean and median are single numbers and different from peak-to-peak. Is 9 the peak-to-peak value or the median? Did you calculate the mean and median from the same data set? If so, the peak-to-peak number for both mean and median calculations should be the same--the difference between the largest and smallest numbers in the data set.

How did you get the values for the data set? If it was from an ADC pin, what was the signal input and what was the value as measured with a voltmeter?

If you are measuring a signal very close to zero, I would expect the distribution to be non-Gaussian, as all numbers below zero would come up zero----the curve would be cut off at zero on the left and might extend 20 or thirty counts on the right.

4. It makes sense when oversampling. For example, take 32 raw samples, take the median of these 32, treat that as a single derived sample. Take 1000 of the derived samples and find the peak-to-peak value. Repeat with mean instead of median.

This is real ADC data from a simple voltage divider producing 1.04020V on a T4.0. For consistency with T3 testing, I extend all data to 16 bits (so about 21,000 counts).

I've never seen median under-perform mean. So if speed or memory isn't an issue, I'd always use median.

5. A few more questions:

Were your numbers for P-P on mean and median taken after you multiplied by 16 to make the data 16 bits? If so, your P-P figures of 9 and 38 counts should be divided by 16 to get the P-P numbers from the 12-bit ADC.

Do you do your mean and median calculations in integer arithmetic or floating point. Lots of people use floating point and oversampling to try to get better resolution sometimes with deceptive results

For large samples (N > 1000), mean and median seem to give equivalent results in LSBs at 12-bit resolution. For small samples (N = 32), it doesn't surprise me that the mean and median would be different---particularly if you are multiplying by 16 before doing the calculations.

If you are working with 16-bit numbers--with a sample size o 32--the bin median algorithm takes a lot more memory for the bins. With that small a sample the speed difference between O(N) for bin median and O(NlogN) for standard median algorithms should be small.

It would be easier for me to understand your numbers if you would post code---or at least some pseudopod--to show your algorithm.

NOTE: if you're taking 32 samples at 10 microseconds each in a burst, then waiting 5 milliseconds before the collecting the next group, you have to be very careful to have low output impedance on your sensor if there is significant capacitance at the ADC input. In situations like that, the first sample of the 32 can be higher by 10 to 20 bits. A similar problem can occur if you have an intermittent noise spike that gets captured in one of your 32 samples. In a case like that the median will generally be a better estimate than the mean, as it will pretty much ignore the noise spike. If the mean of the sample is 1000 counts, the standard deviation about 3 counts, and the noise spike 64 counts, it will increase the mean of 32 samples by 2 counts but have less than one bit influence on the median.

This paragraph from measuring.com seems to support your use of the median for sample size 32: https://measuringu.com/small-n/

Average Time: One long task time can skew the arithmetic mean and make it a poor measure of the middle. In such situations, the median is a better indicator of the typical or “average” time. Unfortunately, the median tends to be less accurate and more biased than the mean when sample sizes are less than about 25. In these circumstances, the geometric mean (average of the log values transformed back) tends to be a better measure of the middle. When sample sizes get above 25, the median works fine.
The "One long task time" seems equivalent to one noise spike in a sample.

6. Thanks mborgerson for the median algorithm. I will sure have a use for that once I understand how it works. I was just browsing Wikipedia and saw something about the "mode" of a data set. This is the value that appears most often in the set. Gives more reliable values when the data is skewed.

https://en.wikipedia.org/wiki/Mode_(statistics)

I was thinking that this could be useful for ADC values. The computations are a little more involved and may be too slow (on a Teensy? Are you kidding?).

7. It would be very simple and fast to find the mode once you have sorted the data into the bins with binMedians()
Code:
```modeval = 0;
maxbinval = 0;
for(i=0; i<numbins; i++){  // find index of bin with highest number of counts. Save that bin number as modeval
if(bins[i] > maxbinval){
modeval = i;
maxbinval = binvals[i];
}
}```
For pretty quiet input values, there is some probability that the two highest bin counts would be the same. Modeval will return the first bin index with that number of counts.

8. I get much better results with median, even with 3 sample oversampling. It is a burst of samples, but I've checked - there is no "first sample is different" bias.

I should do a FFT on the data to get more insight about the non-Gaussian noise. But it might be too high frequency.

I don't advise doing analysis of over sampled data near the limits of resolution - use floating point. Or extend the data to something more than your resolution of interest.

9. Originally Posted by jonr
Or extend the data to something more than your resolution of interest.
How do you extend the resolution of 12-bit data?

If you multiply the data by 16, to get 16-bit data, you have a wider range between the min and max, but you've also multiplied the quantization error by 16. The system still puts out only 4096 discrete values.

If you take the strict definition of the median you could get 2000.5 as a median instead of 2000 for an even number of samples. However, that really doesn't add a lot of resolution.

If you take the mean of many samples you can get better resolution. However extending the resolution from 12 to 16 bits means oversampling by a factor of 256. However for that to work, you need have white noise at the appropriate level at the inputs.

If you have Gaussian noise at the input, averaging might not help much, since for Gaussian noise over something like 32 samples, mean = median = mode to within 1 LSB.

When you find large differences between mean, median, and mode in a small sample set, I think it means that you have either a smooth, but lopsided distribution curve or you have one or more spikes in the signal which are predominantly either positive opt negative. Median and mode are better measures of central tendency in those cases---although neither increases the resolution of the data. Instead, they act to reduce the noise level in relation to the signal level.

One of the best arguments for oversampling used to be that it improved the signal-to-noise ratio without increasing the storage requirements. However with the speed memory and storage capability of something like the T4.1, if you need a good signal at 10KHz, you could just sample at 40KHz and store everything (80KBytes/second means you can have two buffers with 100 seconds of data each in the PSRAM of a T4.1).
Then you write out a buffer's worth to SD every 100 seconds. When your sampling run is done, you import the data into Matlab and use the decimate function to digitally filter the data and reduce it to 10KSamples/second. You can also do a spectral analysis of the data if you have sampled at reasonably precise 25-microsecond intervals. That's well within the capabilities of the T4.x ADC when triggered by an interval timer.

The noise source that concerns me most is that caused by the large current spikes drawn by SD cards when they erase blocks. These intermittent 150mA spikes can pull down the T4.1 3.3V supply by as much as several 10s of millivolts. If you have a stable input signal which is not radiometric to that 3.3V supply, you will see a positive spike in the T4.x ADC data as it's reference voltage is reduced. You can reduce the impact of these current spikes by using an external ADC with a good reference or by switching to a T3.X and setting up the ADC to use the internal 1.2V reference.

10. To get from 12 bits to 16, I multiply by 16 and add dither.

Do you have a suggestion for the best way to analyze median vs mean when used with over-sampling as I've described?

What results do you get (offset, peak-to-peak and std-dev) when using median vs mean with over-sampling in a T4.x?

I find that with random noise (generated in software), mean outperforms median. But apparently that is not what the teensy ADC produces.

11. ## Might be off-topic but... something to see!

I've been working most of the day on a program to demonstrate filtering ADC values using the statistical mode. This is what I came up with :
Code:
```/*
* Filtering noise from ADC by finding statistical mode of
*
* It prints the raw values from ADC0 then the mode, the value that appears
* most often.
*
* Open the IDE Serial Plotter for an interesting display!
*/

const uint16_t DATA_LENGTH = 20;

// fill the array with zeros (makes sort unnecessary)
void fillArray( uint16_t data[], uint16_t n ) {
for ( int i = 0; i < n; i++ )
data[ i ] = 0;
}

// insert a new value while keeping the array sorted
// discard the highest or lowest item
void insertSorted( uint16_t data[], uint16_t n, uint16_t newValue ) {
// start searching from the bottom of the array
for ( int i = 0; i < n; i++ ) {
if ( newValue <= data[ i ] ) {
// move up all values above this one
// we must start from the top of the array
// (the highest value is discarded)
for ( int j = n - 1; j > i; j-- ) {
data[ j ] = data[ j - 1 ];
}
// insert new value
data[ i ] = newValue;
return;
}
}
// if we get here it means that newValue is greater than all
// the values in the array so...
// move down all values
// we must start from the bottom of the array
// (the lowest value is discarded)
for ( int i = 0; i < n; i++ ) {
data[ i ] = data[ i + 1 ];
}
// insert new value at the end
data[ n - 1 ] = newValue;
}

// finds the mode, the value that appears most often in the array
// note : the array must be sorted
// mode algorithm adapted from PaulMurrayCbr in Arduino forum
uint16_t modeValue( uint16_t data[], uint16_t n, uint16_t newValue ) {
insertSorted( data, n, newValue );

// look for mode value
int highCount = 0;
int bestValue = -1;
int count = 0;
int value = -1;
for ( int i = 0; i < n; i++ ) {
if ( data[ i ] != value ) {
value = data[ i ];
count = 0;
}
count++;
if ( count > highCount ) {
highCount = count;
bestValue = value;
}
}
// if no mode is found
if ( highCount <= 1 ) {
return data[ n / 2 ];
} else {
return bestValue;
}
}

// print the whole array space-separated
void printArray( uint16_t data[], uint16_t n ) {
for ( int i = 0; i < n; i++ ) {
Serial.print( data[ i ] );
if ( i != n - 1 ) Serial.print( " " );
}
}

/***********************************************
*
*                    SETUP
*
***********************************************/
void setup() {
Serial.begin( 115200 );
while( !Serial && millis() < 4000 ) {};
}

/***********************************************
*
*                     LOOP
*
***********************************************/
void loop() {

// add new value and find mode
uint16_t mValue = modeValue( adcData, DATA_LENGTH, rawValue );

Serial.print( rawValue );
Serial.print( "\t" );
// if using IDE monitor, print array to see the evolution of values
//  Serial.print( "\t" );
// print calculated mode value
Serial.println( mValue );

// use this delay to see the array in the IDE monitor
//delay( 100 );
}```
Try this on Teensy 4.0 with the Serial Plotter. It's spectacular. The mode works pretty well to calm down the ADC jitters. It's not super fast, but what I was looking for is a rock-steady value when reading a pot that is not moving. This seems to do the job real well, if you're reading something that's not changing too fast, like a pot. If you try the code, play around with the array size for more or less stability. There's quite a lot of noise at 12 bits!

The code is not optimized in any way.

12. Originally Posted by jonr
It makes sense when oversampling. For example, take 32 raw samples, take the median of these 32, treat that as a single derived sample. Take 1000 of the derived samples and find the peak-to-peak value. Repeat with mean instead of median.

This is real ADC data from a simple voltage divider producing 1.04020V on a T4.0. For consistency with T3 testing, I extend all data to 16 bits (so about 21,000 counts).

I've never seen median under-perform mean. So if speed or memory isn't an issue, I'd always use median.
Median is a non-linear operation and therefore cannt be used in system-analysis. But it is a excelent method to ignore large sudden signal deviations originating form an intefering process.

So for a DC measurement from multipe ADC samples, Median will potentially perform better.

13. This:

'Median and mode are better measures of central tendency in those cases---although neither increases the resolution of the data. Instead, they act to reduce the noise level in relation to the signal level.'

14. 'To get from 12 bits to 16, I multiply by 16 and add dither.'

15. Originally Posted by jonr
To get from 12 bits to 16, I multiply by 16 and add dither.

Do you have a suggestion for the best way to analyze median vs mean when used with over-sampling as I've described?

What results do you get (offset, peak-to-peak and std-dev) when using median vs mean with over-sampling in a T4.x?

I find that with random noise (generated in software), mean outperforms median. But apparently that is not what the teensy ADC produces.
Your oversampling by 32x will theoretically give you an extra 2.5 bits of resolution. I think the Teensy rand() function produces pretty good white noise for the dithering. I've used rand() with and added offset to test digital low-pass filters and the result looks pretty good. When I want to test analog filters, I generate the digital values at about 200KHz and send them out via a DAC on a T3.6.

1. What sampling speed and conversion speed settings are you using for the ADC?
2. What is your conversion rate after the oversampling, and how are you setting that rate?
3. Are you logging continuously to SD, or buffering and logging?
4. What are you using to generate the input signal? (If if is a potentiometer, what is its resistance and is it connected to V3.3 or to an external supply?)
5. Are you using any RC filtering (a capacitor between ADC pin and ground)?

Note that questions 1-3 would be unnecessary if you followed the rule at the top of the Forum page ;-)

[OLDFART MODE]
In my first job at an oceanographic instrument company my boss was a cranky old fart who taught me most of what I know about analog circuitry for sensors. (my degrees are in chemistry and oceanography). When I thought I had a good prototype running well, he would come into the lab, plug in an electric drill and pull the trigger about a foot from my circuit board. We'd watch the oscilloscope on the output and decide whether my filtering and shielding were adequate. It's a good test for oceanographic instruments as you have to test them on ships which may have a large electric winch motor nearby. OTOH, when the instrument is in an aluminum pressure case and 30m deep in saltwater, a lot of interferences goes away! ;-) As for the timing of this story, let's just say Reagan was in his second term as President at the time.
[/OLDFART MODE]

The point of this is that the switched capacitor voltage booster in your SD card can produce RF noise and the current spikes at that time can affect the V3.3 on the Teensy. Some sensors are particularly susceptible to this noise--among them peizoelectric shear sensors, which have output impedance in the 10s of megOhms and are connected to high-gain current-to-voltage amplifiers.

17. OK, I cleaned up my hacks to J Beale's code enough to post and ensure a better comparison.

A simple breadboard setup, T4.0, ~500 ohm and 1K fixed resistors and a couple of capacitors.

Hopefully it is clear how < 1 original count random dither allows median to produce a result closer to actual.

Code:
```// Analog input test for Teensy 3.0    Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)

#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define SAMPLES 10000         // how many samples to combine for pp, std.dev statistics
#define OS     32            // how many times to oversample
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

void setup() {    // ==============================================================

Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
delay(1000);
Serial.printf("# Teensy ADC test start: %dx%d oversampling \n", OS, num);
delay(1000);

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
// Need to cast the void * to
double a = *((double *)cmp1);
double b = *((double *)cmp2);
// The comparison
return a > b ? -1 : (a < b ? 1 : 0);
}

void loop() {  // ================================================================

long datSum = 0;  // reset our accumulated sum of input values to zero
double sMax = 0;
double sMin = 65535;
long n;            // count of how many readings so far
double x, mean, delta, sumsq, m2, variance, stdev; // to calculate standard deviation

sumsq = 0; // initialize running squared sum of differences
mean = 0; // start off with running mean at zero
m2 = 0;

for (int i = 0; i < SAMPLES; i++) {

#if 1
// mean, else median
double sum = 0;

for (int i = 0; i < OS; ++i) {
sum += (analogRead(analogInPin) << 4)  + random(-800, 800) / 100.0;  // excessive, but who cares
} // for

x = (sum / OS);
#else
// median, usually helps.  Only hurts with random data (which teensy isn't)
double array[OS];

for (int i = 0; i < OS; ++i) {
} // for

qsort(array, OS, sizeof(array[0]), sort_descD);
x = array[OS / 2];
#endif

datSum += (x - OFFSET);
if (x > sMax) sMax = x;
if (x < sMin) sMin = x;

// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
n++;
delta = x - mean;
mean += delta / n;
m2 += (delta * (x - mean));

} // for

variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
stdev = sqrt(variance);  // Calculate standard deviation
float datAvg = (1.0 * datSum) / n;
Serial.print(" Avg: ");    Serial.print(datAvg, 2);
//Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
Serial.print(" St.Dev: ");  Serial.println(stdev, 3);

delay(1000);

} // end main()  =====================================================```

18. Originally Posted by jonr
OK, I cleaned up my hacks to J Beale's code enough to post and ensure a better comparison.

A simple breadboard setup, T4.0, ~500 ohm and 1K fixed resistors and a couple of capacitors.

Hopefully it is clear how < 1 original count random dither allows median to produce a result closer to actual.

Code:
```// Analog input test for Teensy 3.0    Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)

#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define SAMPLES 10000         // how many samples to combine for pp, std.dev statistics
#define OS     32            // how many times to oversample
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

void setup() {    // ==============================================================

Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
delay(1000);
Serial.printf("# Teensy ADC test start: %dx%d oversampling \n", OS, num);
delay(1000);

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
// Need to cast the void * to
double a = *((double *)cmp1);
double b = *((double *)cmp2);
// The comparison
return a > b ? -1 : (a < b ? 1 : 0);
}

void loop() {  // ================================================================

long datSum = 0;  // reset our accumulated sum of input values to zero
double sMax = 0;
double sMin = 65535;
long n;            // count of how many readings so far
double x, mean, delta, sumsq, m2, variance, stdev; // to calculate standard deviation

sumsq = 0; // initialize running squared sum of differences
mean = 0; // start off with running mean at zero
m2 = 0;

for (int i = 0; i < SAMPLES; i++) {

#if 1
// mean, else median
double sum = 0;

for (int i = 0; i < OS; ++i) {
sum += (analogRead(analogInPin) << 4)  + random(-800, 800) / 100.0;  // excessive, but who cares
} // for

x = (sum / OS);
#else
// median, usually helps.  Only hurts with random data (which teensy isn't)
double array[OS];

for (int i = 0; i < OS; ++i) {
} // for

qsort(array, OS, sizeof(array[0]), sort_descD);
x = array[OS / 2];
#endif

datSum += (x - OFFSET);
if (x > sMax) sMax = x;
if (x < sMin) sMin = x;

// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
n++;
delta = x - mean;
mean += delta / n;
m2 += (delta * (x - mean));

} // for

variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
stdev = sqrt(variance);  // Calculate standard deviation
float datAvg = (1.0 * datSum) / n;
Serial.print(" Avg: ");    Serial.print(datAvg, 2);
//Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
Serial.print(" St.Dev: ");  Serial.println(stdev, 3);

delay(1000);

} // end main()  =====================================================```
I'll give that code a try. I do have a few problems with the code as is and I may modify it:
1. Since the code uses the standard Arduino analogRead() instead of the ADC object, I will have to do some research to see what ADC sampling and conversion speeds are being used.
3. The code is going to run faster on a T4X than a T3X since the T4X has hardware double floating point.
2. There is no fixed timing to the collection loop, It is basically:

A. Convert oversample times as fast as possible
B. calculate sample, (mean or median)
C. add sample data to statistics
D. loop back to A until we have enough samples

E. Show results wait 1 second, then go to A.

The problem is that we have no idea how long the loop takes, so we don't know the sampling rate. Even worse, the time to do a quick sort is variable, depending on the data sorted.

19. Valid points, although I've tried various things with timing with little to no effect on the numbers - median remains far better than mean. This suggests the noise isn't Gaussian - which doesn't surprise me at all.

20. How 'better' exactly?

PS, did you bandwidth limit the adc input?

21. You could put large capacitor from ADC input to gnd, e. g. 1000uF. That would provide a low impedance to the ADC input for audio frequencies.

22. A histogram of the raw ADC values, with that 1000uF in place would tell alot about the ADC performance.

23. I modified jonr's code and ran some tests. I made the following changes: (Code at bottom)

1. in the dithering, I switched random(-800, 800) to random(-800,801) as the second parameter to random is exclusive and with the first set you never get a value of 800.
2. I added an elapsedMicro timer to set the sample interval to 1mSec.
3. I reduced the number of samples in a set to 2000, so I get a new line of data once each 2 seconds.
4. I added a simple user interface to respond to commands:
…….. 'm' Switch between mean(32) and median(32) for calculating the oversampled value;
…….. 'd' to turn dithering on and off
…….. 'e' to switch from the original verbose output to tab-separated numbers that can be pasted into Excel

My hardware was a T4.1 with a NIMH cell with 1.1K series resistance and a 0.1uF capacitor from ADC pin to ground.

Some observations:
A. with dithering on, the median(32) has less noise than mean(32) (Std. dev of ~3 vs ~5 P-P ~27 vs ~60)
B. with dithering off, noise is much lower for median(32) (Std. dev of about 0.7)
C. with dithering off, the P-P values for median(32) bounce between 16 and 32, indicating that all the original values were within 1 to 2 bits of each other.
D. Using median(32) produces lower average values than mean(32). I did a T-test to see if a group of 23 2000 mean(32) sample averages were different from a group of 23 2000 median(32) sample averages. The probability that the groups had the same averages was 0.0003.

I suspect that the difference between the median(32) and mean(32) results has to do with the fact that the data has such a small range and is highly quantized with only two or three different ADC values before multiplying by 16 and adding the dither.

Tomorrow I'll consider changing the program to run the mean(32) and median(32) algorithms on the same ADC input data. The other test I want to try is to collect data from both methods while I feed in a slow triangle wave and save all 2000 samples.. That way I can see whether
the resolution is improved by the expected 2.5 bits.

Code:
```// Analog input test for Teensy 4.X   Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)
// Posted to PJRC form 8/20/20
// Modified by mborgerson 8/20/20

#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define SAMPLES 2000         // how many samples to combine for pp, std.dev statistics
#define OS     33            // how many times to oversample even values bias median low
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

const int ledpin = 13;

#define  LEDON digitalWriteFast(ledpin, HIGH);
#define  LEDOFF digitalWriteFast(ledpin, LOW);

void setup() {    // ==============================================================
pinMode(analogInPin, INPUT_DISABLE); // because I don't know default at startup
pinMode(ledpin, OUTPUT);
analogReadRes(12);  // Teensy 3.Xand 4.X: set ADC resolution to this many bits
Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
delay(1000);
Serial.printf("# Teensy ADC test start: %dx%d oversampling \n\n", OS, num);
delay(1000);
srand(micros()); // use unix time as random seed
Serial.println();

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
// Need to cast the void * to
double a = *((double *)cmp1);
double b = *((double *)cmp2);
// The comparison
return a > b ? -1 : (a < b ? 1 : 0);
}

#define SAMPLERATE  1000
bool usemean = false;
bool usedither = true;
bool excelformat = false;
elapsedMicros sampletimer;
uint16_t ditherwidth = 800;  // actual dither value is ditherwidth/100.0
// Added fixed timing for oversampled  acquisition
void loop() {  // ================================================================

long datSum = 0;  // reset our accumulated sum of input values to zero
float ctimesum;
uint32_t startctime;
double sMax = 0;
double sMin = 65535;
long n;            // count of oversampled readings in sample
double x, mean, delta,  m2, variance, stdev; // to calculate standard deviation
// simple user interface

//  sumsq = 0; // initialize running squared sum of differences
mean = 0; // start off with running mean at zero
m2 = 0;
ctimesum = 0; // for accumulating sum of conversion times
sampletimer = 0;  // reset the sampling timer
for (int i = 0; i < SAMPLES; i++) {
// wait until time for next sample
while (sampletimer < (1000000 / SAMPLERATE));
sampletimer = 0;  // reset the sampling timer
startctime = micros();
if (usemean) {
// mean, else median
double sum = 0;
// read the oversampled data in a burst
// use led pin for scope marker to time conversion
LEDON
for (int i = 0; i < OS; ++i) {
//digitalToggle(ledpin); // for scoppe timing---needs TeensyDuino 1.53
if (usedither) { // NOTE random(min,max) can include min, but stops short of max
sum += (analogRead(analogInPin) << 4) + random(-ditherwidth, ditherwidth + 1) / 100.0;
} else {
}
} // for
LEDOFF
x = (sum / OS);
} else {
// median, usually helps.  Only hurts with random data (which teensy isn't)
double array[OS];
LEDON
for (int i = 0; i < OS; ++i) {
//digitalToggle(ledpin);
if (usedither) {  // NOTE random(min,max) can include min, but stops short of max
array[i] = (analogRead(analogInPin) << 4) + random(-ditherwidth, ditherwidth + 1) / 100.0;
} else {
}
} // for
LEDOFF
qsort(array, OS, sizeof(array[0]), sort_descD);
x = array[OS / 2];
} // end of if(usemean)
ctimesum += micros() - startctime;
datSum += (x - OFFSET);
if (x > sMax) sMax = x;
if (x < sMin) sMin = x;

// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
n++;
delta = x - mean;
mean += delta / n;
m2 += (delta * (x - mean));

} // for

variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
stdev = sqrt(variance);  // Calculate standard deviation
float datAvg = (1.0 * datSum) / n;
if (excelformat) { // print tab separated for direct paste to Excel
Serial.print(datAvg, 2); Serial.print("\t");
//   Serial.print(datAvg - EXPECTED, 2); Serial.print("\t");
Serial.print(sMax - sMin); Serial.print("\t");
Serial.print(stdev, 3); Serial.print("\t");
if (usemean) Serial.print("  mean "); else Serial.print("  med ");
Serial.print("\t");
if (usedither) Serial.println("   d"); else Serial.println("  nd");
} else {  //verbose format
Serial.print(" Avg: ");    Serial.print(datAvg, 2);
//Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
Serial.print(" St.Dev: ");  Serial.print(stdev, 3);
if (usemean) Serial.print("  mean "); else Serial.print("  med ");
if (usedither) Serial.println("   d"); else Serial.println("  nd");
// Serial.printf(" Avg. Conversion time: %6.2f uSec\n\n", ctimesum / n);
}
CheckUserInput();

// no need to delay---10,000 samples takes long enough

} // end loop()  =====================================================

// Simple user interface to switch between mean and median of 32 samples
// and to turn dithering on and off
void CheckUserInput(void) {
char ch;
if (Serial.available()) {
if (ch == 'm') usemean = !usemean;
if (ch == 'd') usedither = !usedither;
if (ch == 'r') Showrandval();
if (ch == 'e') excelformat = !excelformat;
}
}

void  Showrandval(void) {
uint32_t i, n;
float rval;
float rmax;
n = 1000000;
rmax = 0;
for (i = 0; i < n; i++) {
rval = random(-800, 800);
if (rval > rmax) rmax = rval;
}
Serial.printf("The max of %lu calls to random(-800,800) is %5.2f\n", n, n, rmax);

rmax = 0;
for (i = 0; i < n; i++) {
rval = random(-800, 801);
if (rval > rmax) rmax = rval;
}
Serial.printf("The ma xof %lu calls to random(-800,801) is %5.2f\n", n, rmax);
delay(500);
}```

24. When I look at the data I collected and consider that the 12-bit samples are distributed between just two or three 12-bit values, it's no surprise that assumptions about a normal distribution are invalid. A histogram count for 2000 samples that looks like

count...value
2100....50
2101....1200
2102....750

doesn't look anything like a normal distribution. Even if you multiply by 16 and add white noise dither, it's still not normal!
With that distribution, it's also not surprising that the median (2101) is different from the mean (2101.35). Median and mode are the same, though.

Our problem with the Teensy ADC is that we just don't have enough noise! ;-)

25. Thanks for sharing, that is good work! And I agree, there isnot enough noise at the ADC input. Too bad there is no 16bit mode.

Do you have a teensy 3.2, 3.6 with which you could repeat this experiment? IIRC these do have a 16bit mode.

That 0.1uF puts the corner freq at 1.5kHz in combination with 1.1kOhm. I would expect the ADC to perform best at very low source impedance, e. g. smaller than 10 Ohm. Could repeat this test with a 1000uF or so.? Or a low noise buffer. A JFET, with grounded gate, and a 1k source to gnd resistor will work as a low noise voltage source.

#### Posting Permissions

• You may not post new threads
• You may not post replies
• You may not post attachments
• You may not edit your posts
•