Running some simple benchmarks on the Teensy 4.0 (quite impressive BTW) showed that the standard sinf() function is surprisingly (and inappropriately) slow for large arguments (>> 2 pi). Note the results are correct, just slow.
My tests generated the table below; note the top right value in the table compared to the other sine implementations. Note that sinf() does not behave this way in other environments, e.g on the (Linux) development computer.
(If this should just be directed elsewhere, what-version / who-is-the-author of the libm.a being used on the Teensy 4.0?)
Many thanks!
Details:
Board: Teensy 4.0
Arduino 1.8.13
Teensyduino 1.53
Dev environment: Linux 64
Code used (set bigValues flag to toggle between big/small values and/or change which sine function at line 35)
My tests generated the table below; note the top right value in the table compared to the other sine implementations. Note that sinf() does not behave this way in other environments, e.g on the (Linux) development computer.
Small Arguments | Large Arguments | |
sinf() | 0.083 usec | 3.090 usec |
sin() | 0.181 usec | 0.374 usec |
arm_sin_f32() | 0.080 usec | 0.082 usec |
(If this should just be directed elsewhere, what-version / who-is-the-author of the libm.a being used on the Teensy 4.0?)
Many thanks!
Details:
Board: Teensy 4.0
Arduino 1.8.13
Teensyduino 1.53
Dev environment: Linux 64
Code used (set bigValues flag to toggle between big/small values and/or change which sine function at line 35)
Code:
#include <arm_math.h> // only needed for the arm_sin_f32() comparison case
// ============================================================
void setup() {
Serial.begin(9600);
}
// ============================================================
void loop()
{
const int NIter = 10000; // test length
const bool bigValues = true; // true for "big" values, false for small
float sum = 0; // dummy summation (thwart optimization)
// time of just the loop (without sine call)
uint32_t baseStartT = micros();
for (int i=0; i<NIter; i++) {
float t = i * 0.00001;
if (bigValues) t += 1000;
sum += t;
}
uint32_t baseDur = micros() - baseStartT;
// timing of loop with a sine evaluation
uint32_t sinStartT = micros();
for (int i=0; i<NIter; i++) {
float t = i * 0.00001;
if (bigValues) t += 1000;
// using only one of the following cases
sum += sinf(t); // float
//sum += sin(t); // double
//sum += arm_sin_f32(t); // float, arm optimized
}
uint32_t sinDur = micros() - sinStartT;
// average time for loop without sine evaluation
double loopPer =sinDur / double(NIter);
// average time just the sine evaluation
double sinPer = (sinDur - baseDur) / double(NIter);
// print the results
Serial.print("raw time per iter=");
Serial.print(loopPer, 3);
Serial.print(" usec, time per sin eval=");
Serial.print(sinPer, 3);
Serial.print(" usec, sum=");
Serial.println(sum); // make sure it's not optimized out
}
Last edited: