Checking ADC noise

Status
Not open for further replies.

mborgerson

Well-known member
During the thread on megasample collection with the T4X https://forum.pjrc.com/threads/60299-1-MSPS-on-a-T4-Is-this-possible we discussed various ways to evaluate noise and the best estimate of a stable signal on recorded ADC data. There were proponents of Pk-Pk noise, mean and standard deviation and median. I decided to evaluate the possibilities and results with both simulated normally-distributed noise and with real-world ADC data. I found that in most cases the mean and median were the same as a measure of central tendency for almost all the case with 12-bit data.

To accomplish this testing, I wrote the attached application that I have been running on a T4.1. The app has the following features:

* You can vary the ADC sampling and conversion speed.
* You can switch between ADC data and simulated data. With simulated data, what used to control ADC sampling sets the standard deviation of the simulated data.
* As part of the results there is a display of histogram data from the data set.
* There is a very fast median calculator---about 10X the speed of calculating the median by selecting the middle entry in quicksorted data.
* There is a very remedial plotting function that you can use with the Arduino Serial Plotter to plot the histogram data.
* You can add fake noise spikes to the data to see how they affect the statistics.

Here is the code:

Code:
/******************************************************************
   Program to demonstrate some sampling statistics to evaluate noise
   in ADC collection.
   Included is a fast algorithm for calculation of median and histogram
   data for 12-bit integer ADC counts.

   You can also add fake noise spikes to the data to see the effect
   on the mean, median, std. deviation and histogram


   M. Borgerson  8/18/2020
 *********************************************************************/

#include <ADC.h>
const int ledpin    = 13;

#define SIMMEAN 2200 // asumed mean for simulated data
#define SPIKEAMPLITUDE 20  // spike amplitude in counts--spikes always positive
#define SPIKEINTERVAL 100  // number of samples between spikes


const char compileTime [] = "T4.1 Noise Measurement Test Compiled on " __DATE__ " " __TIME__;
//NOTE:  If you less than 65535 samples, you should change the bins array to uint16_t
//       to save some space.  Teensies have lots of memory, so 16KB for bins isn't an issue.
//       However, if you collect 16-bit data, 65536 bins might be a problem.

// if you want a LOT of samples on a T4.1 with PSRAM increase this to 4 million and put
// idata in EXTMEM
#define MAXVALS 40000
#define MAXBINS 4096


#define  LEDON digitalWriteFast(ledpin, HIGH);
#define  LEDOFF digitalWriteFast(ledpin, LOW);

uint16_t idata[MAXVALS];
uint32_t bins[MAXBINS];  // sufficient for up to LOTS of 12-bit data

// instantiate a new ADC object
ADC *adc = new ADC(); // adc object;
bool simdata = true;
bool noisespikes = false;
void setup() {
  pinMode(ledpin, OUTPUT);
  pinMode(A0, INPUT_DISABLE); // disable digital keeper resistors

  Serial.begin(9600);
  delay(500);  // wait for Serial to open
  Serial.println();
  Serial.println(compileTime);
  Serial.printf("CPU Frequency: %lu\n", F_CPU_ACTUAL);
  srand(micros());  // use micros() to set seed

}


void loop() {
  char ch;
  uint16_t cval;
  if (Serial.available()) {
    ch = Serial.read();
    switch (ch) {
      case 'b' :  // for checking serial plotter input
        LEDON
        delay(1000);
        LEDOFF
        break;
      case 's' :
        Serial.println("Simulated data will be used.");
        simdata = true;

        break;
      case 'a' :
        Serial.println("Analog data will be used.");
        simdata = false;
        break;
      case 'n' :
        noisespikes = !noisespikes;
        Serial.print("Noise spikes are " );
        if (noisespikes) {
          Serial.println(" ON");
        } else {
          Serial.println(" OFF");
        }

        break;

      case '1' :
      case '2' :
      case '3' :
      case '4' :
      case '5' :
        cval = ch & 0x07;
        GenData(idata, MAXVALS, cval); // cval controls std for simulation, or ADC speed
        HistoStats(idata, MAXVALS);
        break;
      case 'g'  :
        HistoGraph(bins);
        LEDON
        delay(100);
        HistoGraph(bins);
        LEDOFF
        break;
    } // end of switch(ch)
  } // end of if(Serial.available....
}  // end of loop()

// send bin data to Serial() so that Serial Minitor can plot it
// Doesn't plot any zero values, so low-count tails may be messed up
#define BINWIDTH 10
void HistoGraph(uint32_t gbins[]) {
  uint16_t i, j;
 // Serial.println("You have 10 seconds to switch to graph mode <Ctl-shift-L>)");
  delay(10000);
  for (i = 0; i < MAXBINS; i++) {
    if (gbins[i] > 0) {
      for (j = 0 ; j < BINWIDTH; j++) {
        Serial.println(gbins[i]);
        delay(10);
      }
    } // end of if(gbins[i] > 0)
  } // end of for(i=0...
}

// Do some stats on values in idat
void HistoStats(uint16_t idat[], uint32_t numvals) {
  uint32_t startmicro, dmicro;
  uint16_t  median, imin, imax;
  float mean, std;
  Serial.println("\nComputing statistics.");
  delay(5);

  MeanEtc(idat, numvals, &mean, &std, &imin, &imax);
  Serial.printf("\nmean : % 8.4f ", mean);
  Serial.printf("std. dev:  % 8.4f   ", std);
  Serial.printf("min = % u   max = % u\n \n", imin, imax);


  Serial.println();
  startmicro = micros();
  median = binMedian(idat, numvals);
  dmicro = micros() - startmicro;
  Serial.printf("%lu sample bin Median    = % u  ", numvals, median);
  Serial.printf("  took % 4lu microseconds\n", dmicro);

  startmicro = micros();
  qsort(idat, numvals, sizeof(uint16_t), cmpfunc);
  median = idat[numvals / 2];
  dmicro = micros() - startmicro;
  Serial.printf("%lu sample QSort Median  = % u  ", numvals, median);
  Serial.printf("  took % 4lu microseconds\n\n", dmicro);

  // Now show histogram from bins data
  ShowHisto(bins, imin, imax);
}


void GenData(uint16_t dat[], uint32_t nvals,  uint16_t istd) {
  if (simdata) {
    SimGenData(dat, nvals, SIMMEAN,  istd);
  } else {
    ADCGenData(dat,  nvals,  istd); // mean set by ADC input, istd controls speed setting
  }
}

// do the  easy stats
void MeanEtc(uint16_t dat[], uint32_t nvals, float *mnptr, float *stptr, uint16_t *minp, uint16_t *maxp) {
  uint32_t i;
  uint16_t imin, imax, ival;
  float  var, fv, s1, s2, mean;
  double  sum, sumsq; // with 4 million samples, even 32-bit floats have rounding problems!
  // NOTE SUMSQ gets LARGE: suppose the values are ~4000 and you sum 100,000 of them then the sumsq
  //       is about (4x10^3) * (4x10^3)  * (1x10^5)   or 1.6x10^12 which is way too big for a uint32_t
  sum = 0.0;
  sumsq = 0.0;
  imin = 65535;
  imax = 0;
  // first get sum and calculate mean
  for (i = 0; i < nvals; i++) {
    ival = dat[i];
    if (ival < imin) imin = ival;
    if (ival > imax) imax = ival;
    sum += (float)ival;
    sumsq += (float)ival * (float)ival;
  }
  mean = sum / nvals;
  // now rcalculate variance in tw-pass algorithm to minimize roundoff error
  s1 = 0; s2 = 0;
  for (i = 0; i < nvals; i++) {
    fv = (float)dat[i] - mean;
    s1 += (fv * fv );
    s2 += fv;
  }
  var = (s1 - s2 * s2 / nvals) / (nvals - 1);
  *mnptr = mean;
  //Serial.printf("Variance: %8.3ef  s1: %8.3e  s2^2/n: %8.3e\n", var, s1, s2*s2/nvals);
  *stptr = sqrt(var);
  *minp = imin;
  *maxp = imax;
}


// Since this is aimed at evaluating noise of 12-bit ADC data,
// the generated numbers are constrained to values from 0 to 4095
// The numbers are also constrained to mean +/- 4 *std dev
void SimGenData(uint16_t dat[], uint32_t nvals,  uint16_t imean, uint16_t istd) {
  float  x, fmean, fstd;
  uint16_t ival;
  uint32_t i;
  bool badresult;
  fmean = imean;
  fstd = istd;

  if (nvals > MAXVALS) nvals = MAXVALS; // stay within targt array
  Serial.println("\n\nGenerating Random Normally distributed data.");
  for (i = 0; i < nvals; i++) {
    LEDON
    do {
      badresult = false;
      x = fastrnorm(fmean, fstd);
      ival = floor(x);
      // if noisespikes are on add a spike every 100 samples
      if (ival > MAXBINS - 1) badresult = true;
      if (ival > (fmean + 6 * fstd)) badresult = true;
      if (ival < (fmean - 6 * fstd)) badresult = true;

    } while (badresult);  // fall through when result in bounds
    if (noisespikes) {
      if ((i % SPIKEINTERVAL) == (SPIKEINTERVAL - 1)) {
        ival += SPIKEAMPLITUDE;
      }
    }
    if (ival > MAXBINS - 1) ival = MAXBINS - 1; // even spikes can get clipped!
    LEDOFF

    dat[i] = ival;
  }  // end of for(i=0...

}



// Collect ADC Data from A0.  dmean is ignored.  dstd
// controls ADC speed settings.  No fancy timing control--
// just collect analog data as fast as analogRead will
// provide it.
void ADCGenData(uint16_t dat[], uint32_t nvals, uint16_t spd) {
  uint32_t i, startmicro, dmicro;
  uint16_t ival;
  startmicro = micros();
  if (nvals > MAXVALS) nvals = MAXVALS;
  Serial.println("\n\nReading ADC data.");
  SetADCSpeed(spd);
  LEDON
  for (i = 0; i < nvals; i++) {
    // straightforward blocking analog read.
    ival = adc->adc0->analogRead(A0);
    // if noisespikes are on add a spike every 100 samples
    if (noisespikes) {
      if ((i % SPIKEINTERVAL) == (SPIKEINTERVAL - 1)) {
        ival += SPIKEAMPLITUDE;
      }
    }
    if (ival > 4095) ival = 4095;
    dat[i] = ival;
  }  // end of for(i=0...

  LEDOFF
  dmicro = micros() - startmicro;
  Serial.printf("Collecting % lu  samples took % 4lu microseconds\n", nvals, dmicro);
}

// Set ADC Speed  from low to very high
void SetADCSpeed(uint16_t spd) {

  switch (spd) {
    case 1:
      Serial.println("ADC Speed set to 12 - bit LOW_SPEED.");
      adc->adc0->setResolution(12); // set bits of resolution
      adc->adc0->setConversionSpeed(ADC_CONVERSION_SPEED::LOW_SPEED);
      adc->adc0->setSamplingSpeed(ADC_SAMPLING_SPEED::LOW_SPEED);
      break;
    case 2:
      Serial.println("ADC Speed set to 12 - bit MED_SPEED.");
      adc->adc0->setResolution(12); // set bits of resolution
      adc->adc0->setConversionSpeed(ADC_CONVERSION_SPEED::MED_SPEED);
      adc->adc0->setSamplingSpeed(ADC_SAMPLING_SPEED::MED_SPEED);
      break;
    case 3:
      Serial.println("ADC Speed set to 12 - bit HIGH_SPEED.");
      adc->adc0->setResolution(12); // set bits of resolution
      adc->adc0->setConversionSpeed(ADC_CONVERSION_SPEED::HIGH_SPEED);
      adc->adc0->setSamplingSpeed(ADC_SAMPLING_SPEED::HIGH_SPEED);
      break;
    case 4:
      Serial.println("ADC Speed set to 12 - bit VERY_HIGH range.");
      adc->adc0->setResolution(12); // set bits of resolution
      adc->adc0->setConversionSpeed(ADC_CONVERSION_SPEED::VERY_HIGH_SPEED);
      adc->adc0->setSamplingSpeed(ADC_SAMPLING_SPEED::VERY_HIGH_SPEED);
      break;
    case 5:
      Serial.println("ADC Speed set to 8 - bit HIGH_SPEED.");
      adc->adc0->setResolution(8); // set bits of resolution
      adc->adc0->setConversionSpeed(ADC_CONVERSION_SPEED::HIGH_SPEED);
      adc->adc0->setSamplingSpeed(ADC_SAMPLING_SPEED::HIGH_SPEED);
      break;
  }
}

// finds median without recursion, comparison or sorting
// takes about 140mSec to find median of 100,000 records
uint16_t binMedian(uint16_t dat[], uint32_t numsamp) {
  uint32_t i, bsum;
  uint16_t val;


  // empty the bins--they're on the stack and have random values
  memset(bins, 0, sizeof(bins));
  //  Serial.printf("In binMedian, address of dat is % p\n", &dat);
  //   delay(5);
  for (i = 0; i < numsamp; i++) {
    val = dat[i];

    if (val < MAXBINS) { // prevent writing past end of bins array
      bins[val]++;
    }

  } // bins are filled
  i = 0; bsum = 0;
  do {
    bsum += bins[i];
    i++;
  }  while (bsum < numsamp / 2);

  return i - 1;
}


void ShowHisto(uint32_t bins[], uint16_t imin, uint16_t imax) {
  uint16_t i, lnum;
  lnum = 0;
  Serial.print("Histogram Data");
  for (i = imin - 1; i < imax + 2; i++) {
    if ((lnum % 10) == 0) Serial.printf("\n % 4d: ", i);
    lnum++;
    Serial.printf(" % 5lu ", bins[i]);
  }
  Serial.println();
}


// These are the data collection and simulation routines
inline int cmpfunc (const void * a, const void * b) {
  uint16_t ia = *(uint16_t *)a;
  uint16_t ib = *(uint16_t *)b;
  return ( ia - ib);
}


// returns a random floating point value between 0.0 and 1.0
// we use the built-in rand() function for speed as we are
// not particularly picky about the characteristics of the number
float randf(void) {
  float val;
  val = (float)rand() / ((float)(RAND_MAX) - 0.5);
  return val;
}
// convert floating rnd to normal curve
float fastrnorm(float mean, float stdDev)
{
  static float spare;
  static float u1;
  static float u2;
  static float s;
  static bool isSpareReady = false;

  if (isSpareReady)
  {
    isSpareReady = false;
    return ((spare * stdDev) + mean);
  } else {
    do {
      u1 = (randf() * 2) - 1;
      u2 = (randf() * 2) - 1;
      s = (u1 * u1) + (u2 * u2);
    } while (s >= 1.0);
    s = sqrtf(-2.0 * logf(s) / s);
    spare = u2 * s;
    isSpareReady = true;
    return (mean + (stdDev * u1 * s));
  }
}
 
In my T4.0 setup, median results in a peak-to-peak of 9 counts and mean results in 38 counts. So in some cases, it makes a huge difference. Noise isn't always Gaussian - try both mean and median for any specific situation.
 
I'm not sure what your numbers mean. Mean and median are single numbers and different from peak-to-peak. Is 9 the peak-to-peak value or the median? Did you calculate the mean and median from the same data set? If so, the peak-to-peak number for both mean and median calculations should be the same--the difference between the largest and smallest numbers in the data set.

How did you get the values for the data set? If it was from an ADC pin, what was the signal input and what was the value as measured with a voltmeter?

If you are measuring a signal very close to zero, I would expect the distribution to be non-Gaussian, as all numbers below zero would come up zero----the curve would be cut off at zero on the left and might extend 20 or thirty counts on the right.
 
It makes sense when oversampling. For example, take 32 raw samples, take the median of these 32, treat that as a single derived sample. Take 1000 of the derived samples and find the peak-to-peak value. Repeat with mean instead of median.

This is real ADC data from a simple voltage divider producing 1.04020V on a T4.0. For consistency with T3 testing, I extend all data to 16 bits (so about 21,000 counts).

I've never seen median under-perform mean. So if speed or memory isn't an issue, I'd always use median.
 
A few more questions:

Were your numbers for P-P on mean and median taken after you multiplied by 16 to make the data 16 bits? If so, your P-P figures of 9 and 38 counts should be divided by 16 to get the P-P numbers from the 12-bit ADC.

Do you do your mean and median calculations in integer arithmetic or floating point. Lots of people use floating point and oversampling to try to get better resolution sometimes with deceptive results

For large samples (N > 1000), mean and median seem to give equivalent results in LSBs at 12-bit resolution. For small samples (N = 32), it doesn't surprise me that the mean and median would be different---particularly if you are multiplying by 16 before doing the calculations.

If you are working with 16-bit numbers--with a sample size o 32--the bin median algorithm takes a lot more memory for the bins. With that small a sample the speed difference between O(N) for bin median and O(NlogN) for standard median algorithms should be small.

It would be easier for me to understand your numbers if you would post code---or at least some pseudopod--to show your algorithm.

NOTE: if you're taking 32 samples at 10 microseconds each in a burst, then waiting 5 milliseconds before the collecting the next group, you have to be very careful to have low output impedance on your sensor if there is significant capacitance at the ADC input. In situations like that, the first sample of the 32 can be higher by 10 to 20 bits. A similar problem can occur if you have an intermittent noise spike that gets captured in one of your 32 samples. In a case like that the median will generally be a better estimate than the mean, as it will pretty much ignore the noise spike. If the mean of the sample is 1000 counts, the standard deviation about 3 counts, and the noise spike 64 counts, it will increase the mean of 32 samples by 2 counts but have less than one bit influence on the median.

This paragraph from measuring.com seems to support your use of the median for sample size 32: https://measuringu.com/small-n/

Average Time: One long task time can skew the arithmetic mean and make it a poor measure of the middle. In such situations, the median is a better indicator of the typical or “average” time. Unfortunately, the median tends to be less accurate and more biased than the mean when sample sizes are less than about 25. In these circumstances, the geometric mean (average of the log values transformed back) tends to be a better measure of the middle. When sample sizes get above 25, the median works fine.

The "One long task time" seems equivalent to one noise spike in a sample.
 
Thanks mborgerson for the median algorithm. I will sure have a use for that once I understand how it works. I was just browsing Wikipedia and saw something about the "mode" of a data set. This is the value that appears most often in the set. Gives more reliable values when the data is skewed.

https://en.wikipedia.org/wiki/Mode_(statistics)

I was thinking that this could be useful for ADC values. The computations are a little more involved and may be too slow (on a Teensy? Are you kidding?).
 
It would be very simple and fast to find the mode once you have sorted the data into the bins with binMedians()
Code:
modeval = 0;
maxbinval = 0;
for(i=0; i<numbins; i++){  // find index of bin with highest number of counts. Save that bin number as modeval
   if(bins[i] > maxbinval){
       modeval = i;
       maxbinval = binvals[i];
    }
}

For pretty quiet input values, there is some probability that the two highest bin counts would be the same. Modeval will return the first bin index with that number of counts.
 
I get much better results with median, even with 3 sample oversampling. It is a burst of samples, but I've checked - there is no "first sample is different" bias.

I should do a FFT on the data to get more insight about the non-Gaussian noise. But it might be too high frequency.

I don't advise doing analysis of over sampled data near the limits of resolution - use floating point. Or extend the data to something more than your resolution of interest.
 
Or extend the data to something more than your resolution of interest.

How do you extend the resolution of 12-bit data?

If you multiply the data by 16, to get 16-bit data, you have a wider range between the min and max, but you've also multiplied the quantization error by 16. The system still puts out only 4096 discrete values.

If you take the strict definition of the median you could get 2000.5 as a median instead of 2000 for an even number of samples. However, that really doesn't add a lot of resolution.

If you take the mean of many samples you can get better resolution. However extending the resolution from 12 to 16 bits means oversampling by a factor of 256. However for that to work, you need have white noise at the appropriate level at the inputs.

If you have Gaussian noise at the input, averaging might not help much, since for Gaussian noise over something like 32 samples, mean = median = mode to within 1 LSB.

When you find large differences between mean, median, and mode in a small sample set, I think it means that you have either a smooth, but lopsided distribution curve or you have one or more spikes in the signal which are predominantly either positive opt negative. Median and mode are better measures of central tendency in those cases---although neither increases the resolution of the data. Instead, they act to reduce the noise level in relation to the signal level.

One of the best arguments for oversampling used to be that it improved the signal-to-noise ratio without increasing the storage requirements. However with the speed memory and storage capability of something like the T4.1, if you need a good signal at 10KHz, you could just sample at 40KHz and store everything (80KBytes/second means you can have two buffers with 100 seconds of data each in the PSRAM of a T4.1).
Then you write out a buffer's worth to SD every 100 seconds. When your sampling run is done, you import the data into Matlab and use the decimate function to digitally filter the data and reduce it to 10KSamples/second. You can also do a spectral analysis of the data if you have sampled at reasonably precise 25-microsecond intervals. That's well within the capabilities of the T4.x ADC when triggered by an interval timer.

The noise source that concerns me most is that caused by the large current spikes drawn by SD cards when they erase blocks. These intermittent 150mA spikes can pull down the T4.1 3.3V supply by as much as several 10s of millivolts. If you have a stable input signal which is not radiometric to that 3.3V supply, you will see a positive spike in the T4.x ADC data as it's reference voltage is reduced. You can reduce the impact of these current spikes by using an external ADC with a good reference or by switching to a T3.X and setting up the ADC to use the internal 1.2V reference.
 
To get from 12 bits to 16, I multiply by 16 and add dither.

Do you have a suggestion for the best way to analyze median vs mean when used with over-sampling as I've described?

What results do you get (offset, peak-to-peak and std-dev) when using median vs mean with over-sampling in a T4.x?

I find that with random noise (generated in software), mean outperforms median. But apparently that is not what the teensy ADC produces.
 
Might be off-topic but... something to see!

I've been working most of the day on a program to demonstrate filtering ADC values using the statistical mode. This is what I came up with :
Code:
/*
 * Filtering noise from ADC by finding statistical mode of 
 * mutiple readings
 * 
 * It prints the raw values from ADC0 then the mode, the value that appears
 * most often.
 * 
 * Open the IDE Serial Plotter for an interesting display!
 */

#include <ADC.h>

// create ADC object
ADC *adc = new ADC();

// array that contains several ADC readings
const uint16_t DATA_LENGTH = 20;
uint16_t adcData[ DATA_LENGTH ];

// fill the array with zeros (makes sort unnecessary)
void fillArray( uint16_t data[], uint16_t n ) {
  for ( int i = 0; i < n; i++ )
    data[ i ] = 0;
}

// insert a new value while keeping the array sorted
// discard the highest or lowest item
void insertSorted( uint16_t data[], uint16_t n, uint16_t newValue ) {
  // start searching from the bottom of the array
  for ( int i = 0; i < n; i++ ) {
    if ( newValue <= data[ i ] ) {
      // move up all values above this one
      // we must start from the top of the array
      // (the highest value is discarded)
      for ( int j = n - 1; j > i; j-- ) {
        data[ j ] = data[ j - 1 ];      
      }
      // insert new value
      data[ i ] = newValue;
      return;
    }
  } 
  // if we get here it means that newValue is greater than all 
  // the values in the array so...
  // move down all values
  // we must start from the bottom of the array
  // (the lowest value is discarded)
  for ( int i = 0; i < n; i++ ) {
    data[ i ] = data[ i + 1 ];      
  }
  // insert new value at the end
  data[ n - 1 ] = newValue;
}

// finds the mode, the value that appears most often in the array
// note : the array must be sorted
// mode algorithm adapted from PaulMurrayCbr in Arduino forum 
uint16_t modeValue( uint16_t data[], uint16_t n, uint16_t newValue ) {
  insertSorted( data, n, newValue );

  // look for mode value
  int highCount = 0;
  int bestValue = -1;
  int count = 0;
  int value = -1;
  for ( int i = 0; i < n; i++ ) {
    if ( data[ i ] != value ) {
      value = data[ i ];
      count = 0;
    }
    count++;
    if ( count > highCount ) {
      highCount = count;
      bestValue = value;
    }
  }
  // if no mode is found
  if ( highCount <= 1 ) {
    // return median instead
    return data[ n / 2 ];
  } else {
    return bestValue;
  }
}

// print the whole array space-separated
void printArray( uint16_t data[], uint16_t n ) {
  for ( int i = 0; i < n; i++ ) {
    Serial.print( data[ i ] );
    if ( i != n - 1 ) Serial.print( " " );
  }
}

/***********************************************
 * 
 *                    SETUP
 *          
 ***********************************************/
void setup() {
  adc->adc0->setResolution( 12 );
  Serial.begin( 115200 );
  while( !Serial && millis() < 4000 ) {};
}


/***********************************************
 * 
 *                     LOOP
 *          
 ***********************************************/
void loop() {
  uint16_t rawValue = adc->analogRead( A0 );

  // add new value and find mode
  uint16_t mValue = modeValue( adcData, DATA_LENGTH, rawValue );

  Serial.print( rawValue );
  Serial.print( "\t" );
  // if using IDE monitor, print array to see the evolution of values
//  printArray( adcData, DATA_LENGTH );
//  Serial.print( "\t" );
  // print calculated mode value
  Serial.println( mValue );

  // use this delay to see the array in the IDE monitor
  //delay( 100 );
}

Try this on Teensy 4.0 with the Serial Plotter. It's spectacular. The mode works pretty well to calm down the ADC jitters. It's not super fast, but what I was looking for is a rock-steady value when reading a pot that is not moving. This seems to do the job real well, if you're reading something that's not changing too fast, like a pot. If you try the code, play around with the array size for more or less stability. There's quite a lot of noise at 12 bits!

The code is not optimized in any way.
 
It makes sense when oversampling. For example, take 32 raw samples, take the median of these 32, treat that as a single derived sample. Take 1000 of the derived samples and find the peak-to-peak value. Repeat with mean instead of median.

This is real ADC data from a simple voltage divider producing 1.04020V on a T4.0. For consistency with T3 testing, I extend all data to 16 bits (so about 21,000 counts).

I've never seen median under-perform mean. So if speed or memory isn't an issue, I'd always use median.

Median is a non-linear operation and therefore cannt be used in system-analysis. But it is a excelent method to ignore large sudden signal deviations originating form an intefering process.

So for a DC measurement from multipe ADC samples, Median will potentially perform better.
 
This:

'Median and mode are better measures of central tendency in those cases---although neither increases the resolution of the data. Instead, they act to reduce the noise level in relation to the signal level.'
 
'To get from 12 bits to 16, I multiply by 16 and add dither.'


Adding noise after an ADC does no good, but increasing noise.
 
To get from 12 bits to 16, I multiply by 16 and add dither.

Do you have a suggestion for the best way to analyze median vs mean when used with over-sampling as I've described?

What results do you get (offset, peak-to-peak and std-dev) when using median vs mean with over-sampling in a T4.x?

I find that with random noise (generated in software), mean outperforms median. But apparently that is not what the teensy ADC produces.
Your oversampling by 32x will theoretically give you an extra 2.5 bits of resolution. I think the Teensy rand() function produces pretty good white noise for the dithering. I've used rand() with and added offset to test digital low-pass filters and the result looks pretty good. When I want to test analog filters, I generate the digital values at about 200KHz and send them out via a DAC on a T3.6.

A few questions about your sampling scheme:
1. What sampling speed and conversion speed settings are you using for the ADC?
2. What is your conversion rate after the oversampling, and how are you setting that rate?
3. Are you logging continuously to SD, or buffering and logging?
4. What are you using to generate the input signal? (If if is a potentiometer, what is its resistance and is it connected to V3.3 or to an external supply?)
5. Are you using any RC filtering (a capacitor between ADC pin and ground)?

Note that questions 1-3 would be unnecessary if you followed the rule at the top of the Forum page ;-)
 
The reason I asked about what is connected to the ADC input:

[OLDFART MODE]
In my first job at an oceanographic instrument company my boss was a cranky old fart who taught me most of what I know about analog circuitry for sensors. (my degrees are in chemistry and oceanography). When I thought I had a good prototype running well, he would come into the lab, plug in an electric drill and pull the trigger about a foot from my circuit board. We'd watch the oscilloscope on the output and decide whether my filtering and shielding were adequate. It's a good test for oceanographic instruments as you have to test them on ships which may have a large electric winch motor nearby. OTOH, when the instrument is in an aluminum pressure case and 30m deep in saltwater, a lot of interferences goes away! ;-) As for the timing of this story, let's just say Reagan was in his second term as President at the time.
[/OLDFART MODE]

The point of this is that the switched capacitor voltage booster in your SD card can produce RF noise and the current spikes at that time can affect the V3.3 on the Teensy. Some sensors are particularly susceptible to this noise--among them peizoelectric shear sensors, which have output impedance in the 10s of megOhms and are connected to high-gain current-to-voltage amplifiers.
 
OK, I cleaned up my hacks to J Beale's code enough to post and ensure a better comparison.

A simple breadboard setup, T4.0, ~500 ohm and 1K fixed resistors and a couple of capacitors.

Hopefully it is clear how < 1 original count random dither allows median to produce a result closer to actual.

Code:
// Analog input test for Teensy 3.0    Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)
// Setup: https://picasaweb.google.com/109928236040342205185/Electronics#5795546092126071650


#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define ADCMAX (65536)        // maximum possible reading from ADC
#define EXPECTED (ADCMAX*(VINPUT/VREF))    // expected ADC reading
#define SAMPLES 10000         // how many samples to combine for pp, std.dev statistics
#define OS     32            // how many times to oversample
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

const int analogInPin = A8;   // Analog input pin

void setup() {    // ==============================================================

  analogReadAveraging(num);
  analogReadRes(12);          // Teensy 3.0: set ADC resolution to this many bits

  Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
  delay(1000);
  Serial.printf("# Teensy ADC test start: %dx%d oversampling \n", OS, num);
  delay(1000);

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
  // Need to cast the void * to
  double a = *((double *)cmp1);
  double b = *((double *)cmp2);
  // The comparison
  return a > b ? -1 : (a < b ? 1 : 0);
}


void loop() {  // ================================================================

  long datSum = 0;  // reset our accumulated sum of input values to zero
  double sMax = 0;
  double sMin = 65535;
  long n;            // count of how many readings so far
  double x, mean, delta, sumsq, m2, variance, stdev; // to calculate standard deviation

  sumsq = 0; // initialize running squared sum of differences
  n = 0;    // have not made any ADC readings yet
  mean = 0; // start off with running mean at zero
  m2 = 0;

  for (int i = 0; i < SAMPLES; i++) {

#if 1
    // mean, else median
    double sum = 0;

    for (int i = 0; i < OS; ++i) {
      sum += (analogRead(analogInPin) << 4)  + random(-800, 800) / 100.0;  // excessive, but who cares
    } // for

    x = (sum / OS);
#else
    // median, usually helps.  Only hurts with random data (which teensy isn't)
    double array[OS];

    for (int i = 0; i < OS; ++i) {
      array[i] = (analogRead(analogInPin) << 4) + random(-800, 800) / 100.0;
    } // for

    qsort(array, OS, sizeof(array[0]), sort_descD);
    x = array[OS / 2];
#endif

    datSum += (x - OFFSET);
    if (x > sMax) sMax = x;
    if (x < sMin) sMin = x;

    // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    n++;
    delta = x - mean;
    mean += delta / n;
    m2 += (delta * (x - mean));

  } // for

  variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
  stdev = sqrt(variance);  // Calculate standard deviation
  float datAvg = (1.0 * datSum) / n;
  Serial.print(" Avg: ");    Serial.print(datAvg, 2);
  //Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
  Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
  Serial.print(" St.Dev: ");  Serial.println(stdev, 3);

  delay(1000);

} // end main()  =====================================================
 
OK, I cleaned up my hacks to J Beale's code enough to post and ensure a better comparison.

A simple breadboard setup, T4.0, ~500 ohm and 1K fixed resistors and a couple of capacitors.

Hopefully it is clear how < 1 original count random dither allows median to produce a result closer to actual.

Code:
// Analog input test for Teensy 3.0    Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)
// Setup: https://picasaweb.google.com/109928236040342205185/Electronics#5795546092126071650


#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define ADCMAX (65536)        // maximum possible reading from ADC
#define EXPECTED (ADCMAX*(VINPUT/VREF))    // expected ADC reading
#define SAMPLES 10000         // how many samples to combine for pp, std.dev statistics
#define OS     32            // how many times to oversample
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

const int analogInPin = A8;   // Analog input pin

void setup() {    // ==============================================================

  analogReadAveraging(num);
  analogReadRes(12);          // Teensy 3.0: set ADC resolution to this many bits

  Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
  delay(1000);
  Serial.printf("# Teensy ADC test start: %dx%d oversampling \n", OS, num);
  delay(1000);

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
  // Need to cast the void * to
  double a = *((double *)cmp1);
  double b = *((double *)cmp2);
  // The comparison
  return a > b ? -1 : (a < b ? 1 : 0);
}


void loop() {  // ================================================================

  long datSum = 0;  // reset our accumulated sum of input values to zero
  double sMax = 0;
  double sMin = 65535;
  long n;            // count of how many readings so far
  double x, mean, delta, sumsq, m2, variance, stdev; // to calculate standard deviation

  sumsq = 0; // initialize running squared sum of differences
  n = 0;    // have not made any ADC readings yet
  mean = 0; // start off with running mean at zero
  m2 = 0;

  for (int i = 0; i < SAMPLES; i++) {

#if 1
    // mean, else median
    double sum = 0;

    for (int i = 0; i < OS; ++i) {
      sum += (analogRead(analogInPin) << 4)  + random(-800, 800) / 100.0;  // excessive, but who cares
    } // for

    x = (sum / OS);
#else
    // median, usually helps.  Only hurts with random data (which teensy isn't)
    double array[OS];

    for (int i = 0; i < OS; ++i) {
      array[i] = (analogRead(analogInPin) << 4) + random(-800, 800) / 100.0;
    } // for

    qsort(array, OS, sizeof(array[0]), sort_descD);
    x = array[OS / 2];
#endif

    datSum += (x - OFFSET);
    if (x > sMax) sMax = x;
    if (x < sMin) sMin = x;

    // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    n++;
    delta = x - mean;
    mean += delta / n;
    m2 += (delta * (x - mean));

  } // for

  variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
  stdev = sqrt(variance);  // Calculate standard deviation
  float datAvg = (1.0 * datSum) / n;
  Serial.print(" Avg: ");    Serial.print(datAvg, 2);
  //Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
  Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
  Serial.print(" St.Dev: ");  Serial.println(stdev, 3);

  delay(1000);

} // end main()  =====================================================

I'll give that code a try. I do have a few problems with the code as is and I may modify it:
1. Since the code uses the standard Arduino analogRead() instead of the ADC object, I will have to do some research to see what ADC sampling and conversion speeds are being used.
3. The code is going to run faster on a T4X than a T3X since the T4X has hardware double floating point.
2. There is no fixed timing to the collection loop, It is basically:

A. Convert oversample times as fast as possible
B. calculate sample, (mean or median)
C. add sample data to statistics
D. loop back to A until we have enough samples

E. Show results wait 1 second, then go to A.

The problem is that we have no idea how long the loop takes, so we don't know the sampling rate. Even worse, the time to do a quick sort is variable, depending on the data sorted.
 
Valid points, although I've tried various things with timing with little to no effect on the numbers - median remains far better than mean. This suggests the noise isn't Gaussian - which doesn't surprise me at all.
 
You could put large capacitor from ADC input to gnd, e. g. 1000uF. That would provide a low impedance to the ADC input for audio frequencies.
 
I modified jonr's code and ran some tests. I made the following changes: (Code at bottom)

1. in the dithering, I switched random(-800, 800) to random(-800,801) as the second parameter to random is exclusive and with the first set you never get a value of 800.
2. I added an elapsedMicro timer to set the sample interval to 1mSec.
3. I reduced the number of samples in a set to 2000, so I get a new line of data once each 2 seconds.
4. I added a simple user interface to respond to commands:
…….. 'm' Switch between mean(32) and median(32) for calculating the oversampled value;
…….. 'd' to turn dithering on and off
…….. 'e' to switch from the original verbose output to tab-separated numbers that can be pasted into Excel

My hardware was a T4.1 with a NIMH cell with 1.1K series resistance and a 0.1uF capacitor from ADC pin to ground.

Some observations:
A. with dithering on, the median(32) has less noise than mean(32) (Std. dev of ~3 vs ~5 P-P ~27 vs ~60)
B. with dithering off, noise is much lower for median(32) (Std. dev of about 0.7)
C. with dithering off, the P-P values for median(32) bounce between 16 and 32, indicating that all the original values were within 1 to 2 bits of each other.
D. Using median(32) produces lower average values than mean(32). I did a T-test to see if a group of 23 2000 mean(32) sample averages were different from a group of 23 2000 median(32) sample averages. The probability that the groups had the same averages was 0.0003.

I suspect that the difference between the median(32) and mean(32) results has to do with the fact that the data has such a small range and is highly quantized with only two or three different ADC values before multiplying by 16 and adding the dither.

Tomorrow I'll consider changing the program to run the mean(32) and median(32) algorithms on the same ADC input data. The other test I want to try is to collect data from both methods while I feed in a slow triangle wave and save all 2000 samples.. That way I can see whether
the resolution is improved by the expected 2.5 bits.

Code:
// Analog input test for Teensy 4.X   Oct 4 2012 J.Beale
// Hacked on by JonR (probably the source of any problems)
// Setup: https://picasaweb.google.com/109928236040342205185/Electronics#5795546092126071650
// Posted to PJRC form 8/20/20
// Modified by mborgerson 8/20/20

#define VREF (3.2444)         // ADC reference voltage (= power supply)
#define VINPUT (1.04020)      // ADC input voltage from resistive divider to VREF
#define ADCMAX (65536)        // maximum possible reading from ADC
#define EXPECTED (ADCMAX*(VINPUT/VREF))    // expected ADC reading
#define SAMPLES 2000         // how many samples to combine for pp, std.dev statistics
#define OS     33            // how many times to oversample even values bias median low 
#define OFFSET  0 // -149.76  // calibrate out a fixed offset
#define num 1                 // ADC internal oversampling

const int analogInPin = A8;   // Analog input pin
const int ledpin = 13;

#define  LEDON digitalWriteFast(ledpin, HIGH);
#define  LEDOFF digitalWriteFast(ledpin, LOW);

void setup() {    // ==============================================================
  pinMode(analogInPin, INPUT_DISABLE); // because I don't know default at startup
  pinMode(ledpin, OUTPUT);
  analogReadAveraging(num);
  analogReadRes(12);  // Teensy 3.Xand 4.X: set ADC resolution to this many bits
  // on T4.X, Default Arduino AnalogRead(pin) takes about 6uSec---the slowest mode!
  Serial.begin(115200);      // baud rate is ignored with Teensy USB ACM i/o
  delay(1000);
  Serial.printf("# Teensy ADC test start: %dx%d oversampling \n\n", OS, num);
  delay(1000);
  srand(micros()); // use unix time as random seed
  Serial.println();

} // ==== end setup() ===========

int sort_descD(const void *cmp1, const void *cmp2)
{
  // Need to cast the void * to
  double a = *((double *)cmp1);
  double b = *((double *)cmp2);
  // The comparison
  return a > b ? -1 : (a < b ? 1 : 0);
}

#define SAMPLERATE  1000
bool usemean = false;
bool usedither = true;
bool excelformat = false;
elapsedMicros sampletimer;
uint16_t ditherwidth = 800;  // actual dither value is ditherwidth/100.0
// Added fixed timing for oversampled  acquisition
void loop() {  // ================================================================

  long datSum = 0;  // reset our accumulated sum of input values to zero
  float ctimesum;
  uint32_t startctime;
  double sMax = 0;
  double sMin = 65535;
  long n;            // count of oversampled readings in sample
  double x, mean, delta,  m2, variance, stdev; // to calculate standard deviation
  // simple user interface

  //  sumsq = 0; // initialize running squared sum of differences
  n = 0;    // have not made any ADC readings yet
  mean = 0; // start off with running mean at zero
  m2 = 0;
  ctimesum = 0; // for accumulating sum of conversion times
  sampletimer = 0;  // reset the sampling timer
  for (int i = 0; i < SAMPLES; i++) {
    // wait until time for next sample
    while (sampletimer < (1000000 / SAMPLERATE));
    sampletimer = 0;  // reset the sampling timer
    startctime = micros();
    if (usemean) {
      // mean, else median
      double sum = 0;
      // read the oversampled data in a burst
      // use led pin for scope marker to time conversion
      LEDON
      for (int i = 0; i < OS; ++i) {
        //digitalToggle(ledpin); // for scoppe timing---needs TeensyDuino 1.53
        if (usedither) { // NOTE random(min,max) can include min, but stops short of max
          sum += (analogRead(analogInPin) << 4) + random(-ditherwidth, ditherwidth + 1) / 100.0;
        } else {
          sum += (analogRead(analogInPin) << 4);
        }
      } // for
      LEDOFF
      x = (sum / OS);
    } else {
      // median, usually helps.  Only hurts with random data (which teensy isn't)
      double array[OS];
      LEDON
      for (int i = 0; i < OS; ++i) {
        //digitalToggle(ledpin);
        if (usedither) {  // NOTE random(min,max) can include min, but stops short of max
          array[i] = (analogRead(analogInPin) << 4) + random(-ditherwidth, ditherwidth + 1) / 100.0;
        } else {
          array[i] = (analogRead(analogInPin) << 4);
        }
      } // for
      LEDOFF
      qsort(array, OS, sizeof(array[0]), sort_descD);
      x = array[OS / 2];
    } // end of if(usemean)
    ctimesum += micros() - startctime;
    datSum += (x - OFFSET);
    if (x > sMax) sMax = x;
    if (x < sMin) sMin = x;

    // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    n++;
    delta = x - mean;
    mean += delta / n;
    m2 += (delta * (x - mean));

  } // for

  variance = m2 / (n - 1); // (n-1):Sample Variance  (n): Population Variance
  stdev = sqrt(variance);  // Calculate standard deviation
  float datAvg = (1.0 * datSum) / n;
  if (excelformat) { // print tab separated for direct paste to Excel
    Serial.print(datAvg, 2); Serial.print("\t");
 //   Serial.print(datAvg - EXPECTED, 2); Serial.print("\t");
    Serial.print(sMax - sMin); Serial.print("\t");
    Serial.print(stdev, 3); Serial.print("\t");
    if (usemean) Serial.print("  mean "); else Serial.print("  med ");
    Serial.print("\t");
    if (usedither) Serial.println("   d"); else Serial.println("  nd");
  } else {  //verbose format
    Serial.print(" Avg: ");    Serial.print(datAvg, 2);
    //Serial.print(" Offset: ");  Serial.print(datAvg - EXPECTED, 2);
    Serial.print(" P-P noise: ");  Serial.print(sMax - sMin);
    Serial.print(" St.Dev: ");  Serial.print(stdev, 3);
    if (usemean) Serial.print("  mean "); else Serial.print("  med ");
    if (usedither) Serial.println("   d"); else Serial.println("  nd");
    // Serial.printf(" Avg. Conversion time: %6.2f uSec\n\n", ctimesum / n);
  }
  CheckUserInput();

  // no need to delay---10,000 samples takes long enough

} // end loop()  =====================================================

// Simple user interface to switch between mean and median of 32 samples
// and to turn dithering on and off
void CheckUserInput(void) {
  char ch;
  if (Serial.available()) {
    ch = Serial.read();
    if (ch == 'm') usemean = !usemean;
    if (ch == 'd') usedither = !usedither;
    if (ch == 'r') Showrandval();
    if (ch == 'e') excelformat = !excelformat;
  }
}

void  Showrandval(void) {
  uint32_t i, n;
  float rval;
  float rmax;
  n = 1000000;
  rmax = 0;
  for (i = 0; i < n; i++) {
    rval = random(-800, 800);
    if (rval > rmax) rmax = rval;
  }
  Serial.printf("The max of %lu calls to random(-800,800) is %5.2f\n", n, n, rmax);

  rmax = 0;
  for (i = 0; i < n; i++) {
    rval = random(-800, 801);
    if (rval > rmax) rmax = rval;
  }
  Serial.printf("The ma xof %lu calls to random(-800,801) is %5.2f\n", n, rmax);
  delay(500);
}
 
When I look at the data I collected and consider that the 12-bit samples are distributed between just two or three 12-bit values, it's no surprise that assumptions about a normal distribution are invalid. A histogram count for 2000 samples that looks like

ADC.....bin
count...value
2100....50
2101....1200
2102....750

doesn't look anything like a normal distribution. Even if you multiply by 16 and add white noise dither, it's still not normal!
With that distribution, it's also not surprising that the median (2101) is different from the mean (2101.35). Median and mode are the same, though.

Our problem with the Teensy ADC is that we just don't have enough noise! ;-)
 
Thanks for sharing, that is good work! And I agree, there isnot enough noise at the ADC input. Too bad there is no 16bit mode.

Do you have a teensy 3.2, 3.6 with which you could repeat this experiment? IIRC these do have a 16bit mode.

That 0.1uF puts the corner freq at 1.5kHz in combination with 1.1kOhm. I would expect the ADC to perform best at very low source impedance, e. g. smaller than 10 Ohm. Could repeat this test with a 1000uF or so.? Or a low noise buffer. A JFET, with grounded gate, and a 1k source to gnd resistor will work as a low noise voltage source.
 
Last edited:
Status
Not open for further replies.
Back
Top