I've found this class useful for collecting statistics for a stream of data in a single pass. Simply call `update()` with the new sample and it does single-pass calculation of the mean and variance. Note that I've used floats, but it's possible you may wish to use doubles.
Note: I call this 'Stat' rather than 'Stats' because I think of the final values as coordinates of a single point in some space, i.e. a "stat".
Code:
// Stat.h defines statistics collection for a stream of data.
// (c) 2022-2023 Shawn Silverman
#ifndef STAT_H_
#define STAT_H_
#include <algorithm>
#include <limits>
// Holds statistics for one stream of samples.
template <typename T>
class Stat {
public:
// Creates and initializes the stats.
Stat() {
reset();
}
// Returns the current count.
uint32_t count() const {
return count_;
}
// Returns the current minimum.
T min() const {
return min_;
}
// Returns the current maximum.
T max() const {
return max_;
}
// Returns the current mean.
float mean() const {
return mean_;
}
// Returns the current variance.
float var() const {
return m2_ / count_;
}
// Returns the current sample variance.
float sampleVar() const {
return m2_ / (count_ - 1);
}
// Resets all the stats.
void reset() {
count_ = 0;
min_ = std::numeric_limits<T>::max();
max_ = 0;
mean_ = 0.0f;
m2_ = 0.0f;
}
// Updates the statistics with one sample.
void update(T sample) {
count_++;
min_ = std::min(min_, sample);
max_ = std::max(max_, sample);
// Welford's online algorithm
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
float delta = (sample - mean_);
mean_ += delta / count_;
m2_ += delta*(sample - mean_);
}
private:
uint32_t count_;
T min_;
T max_;
float mean_;
float m2_; // Aggregates the squared distance from the mean
// Variance is m2/count and sample variance is m2/(count - 1)
};
#endif // STAT_H_
Note: I call this 'Stat' rather than 'Stats' because I think of the final values as coordinates of a single point in some space, i.e. a "stat".