Ben
Well-known member
I'd like to make the case for this faster 1/sqrtf() implementation again.
It's twice as fast. Worst case error I found so far was better than 0.00005 absolute.
-Ben
And the serial output:
It's twice as fast. Worst case error I found so far was better than 0.00005 absolute.
-Ben
Code:
#define TESTCOUNT 10
#define TESTLENGTH 5000
elapsedMicros time;
unsigned int sqrtfTime;
unsigned int qsqrtfTime;
void setup() {
Serial.begin(9600);
delay(500);
for (int test=0 ; test<TESTCOUNT ; test++) {
Serial.print("Test #"); Serial.println(test+1);
// ***** generating random values to test on
randomSeed(analogRead(0));
float randomNumber[TESTLENGTH];
for (int i=0 ; i<TESTLENGTH ; i++){
randomNumber[i] = float(random(1E9)) / 1E6;
}
// ***** calculate normal sqrtf() *****
float normalroot[TESTLENGTH];
time = 0;
for (int i=0 ; i < TESTLENGTH ; i++) {
normalroot[i] = 1.f / sqrtf(randomNumber[i]);
}
sqrtfTime = time;
Serial.print("Computing "); Serial.print(TESTLENGTH);
Serial.print(" inverse square roots using the normal sqrtf() function took ");
Serial.print(sqrtfTime); Serial.println(" microseconds.");
// ***** fast qsqrtf() *****
float fastroot[TESTLENGTH];
time = 0;
for (int i=0 ; i < TESTLENGTH ; i++) {
fastroot[i] = qsqrtf(randomNumber[i]);
}
qsqrtfTime = time;
Serial.print("Computing "); Serial.print(TESTLENGTH);
Serial.print(" inverse square roots using the fast qsqrtf() function took ");
Serial.print(qsqrtfTime); Serial.println(" microseconds.");
// ***** finding maximum error made by faster function
float maxErrorFast = 0.f;
int maxErrorIndex = 0;
for (int i=0 ; i < TESTLENGTH ; i++) {
if ( (fastroot[i] - normalroot[i]) > maxErrorFast ) {
maxErrorFast = fastroot[i] - normalroot[i];
maxErrorIndex = i;
}
}
Serial.print("maximum absolute error with fast sqrt was: ");
Serial.println(maxErrorFast, 15);
Serial.print("and occurred when computing qsqrtf(");
Serial.print(randomNumber[maxErrorIndex], 15); Serial.println(").");
Serial.print("normal sqrt was "); Serial.print(float(sqrtfTime) / float(qsqrtfTime), 4);
Serial.println(" times slower");
Serial.print("End of test #"); Serial.println(test+1);
Serial.println("");
}
}
void loop() {
delay(1);
}
float qsqrtf(float x) {
long i;
float y;
float x2 = x * 0.5f;
y = x;
i = * (long * ) &y;
i = 0x5f3759df - ( i >> 1 );
y = * ( float * ) &i;
//Newton-Raphson:
y = y * ( 1.5f - ( x2 * y * y ) );
return y;
}
And the serial output:
Code:
Test #1
Computing 5000 inverse square roots using the normal sqrtf() function took 36711 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18242 microseconds.
maximum absolute error with fast sqrt was: 0.000000000000000
and occurred when computing qsqrtf(4.941257953643799).
normal sqrt was 2.0124 times slower
End of test #1
Test #2
Computing 5000 inverse square roots using the normal sqrtf() function took 36710 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18234 microseconds.
maximum absolute error with fast sqrt was: 0.000000014901161
and occurred when computing qsqrtf(54.022491455078125).
normal sqrt was 2.0133 times slower
End of test #2
Test #3
Computing 5000 inverse square roots using the normal sqrtf() function took 36715 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18239 microseconds.
maximum absolute error with fast sqrt was: 0.000000003725290
and occurred when computing qsqrtf(864.105102539062500).
normal sqrt was 2.0130 times slower
End of test #3
Test #4
Computing 5000 inverse square roots using the normal sqrtf() function took 36711 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18248 microseconds.
maximum absolute error with fast sqrt was: 0.000000000000000
and occurred when computing qsqrtf(0.789928972721100).
normal sqrt was 2.0118 times slower
End of test #4
Test #5
Computing 5000 inverse square roots using the normal sqrtf() function took 36714 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18237 microseconds.
maximum absolute error with fast sqrt was: 0.000000000000000
and occurred when computing qsqrtf(1.512629985809326).
normal sqrt was 2.0132 times slower
End of test #5
Test #6
Computing 5000 inverse square roots using the normal sqrtf() function took 36714 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18237 microseconds.
maximum absolute error with fast sqrt was: 0.000000003725290
and occurred when computing qsqrtf(300.916290283203125).
normal sqrt was 2.0132 times slower
End of test #6
Test #7
Computing 5000 inverse square roots using the normal sqrtf() function took 36705 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18233 microseconds.
maximum absolute error with fast sqrt was: 0.000000007450581
and occurred when computing qsqrtf(75.223937988281250).
normal sqrt was 2.0131 times slower
End of test #7
Test #8
Computing 5000 inverse square roots using the normal sqrtf() function took 36708 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18241 microseconds.
maximum absolute error with fast sqrt was: 0.000000014901161
and occurred when computing qsqrtf(18.799392700195312).
normal sqrt was 2.0124 times slower
End of test #8
Test #9
Computing 5000 inverse square roots using the normal sqrtf() function took 36717 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18229 microseconds.
maximum absolute error with fast sqrt was: 0.000000029802322
and occurred when computing qsqrtf(4.698485851287842).
normal sqrt was 2.0142 times slower
End of test #9
Test #10
Computing 5000 inverse square roots using the normal sqrtf() function took 36713 microseconds.
Computing 5000 inverse square roots using the fast qsqrtf() function took 18242 microseconds.
maximum absolute error with fast sqrt was: 0.000000003725290
and occurred when computing qsqrtf(300.983123779296875).
normal sqrt was 2.0126 times slower
End of test #10