
Originally Posted by
PaulStoffregen
Even 1 seek+read from the middle of a 16kbybe file might give useful timing if measured with the ARM cycle counter.
My gut feeling is the timing should be pretty consistent, assuming reads are done far enough apart to avoid the FlexSPI buffer and access to PSRAM isn't competing for the QSPI bandwidth.
Like developing any benchmark, experimentation and careful thought is needed when results aren't as expected. But if the timing is consistent, I'd just pick a number of iterations that lets the benchmark complete in a few seconds. No point wasting human time for diminishing returns.
Did a modification of the Bench sketch but using LittleFS_Program, and added in a random read timing loop. Write/Read blocksizes are 2048bytes and I used random to jump around the file (16k) in 2048 byte increments:
Code:
--------------------------------------------
LittleFS Test
Disk Stats:
Bytes Used: 8192, Bytes Total:6291456
Benchmark:
FILE_SIZE = 16384
BUF_SIZE = 2048 bytes
Starting write test, please wait.
write speed and latency
speed,max,min,avg
KB/Sec,usec,usec,usec
496.48,5214,3447,4086
512.00,5006,3446,3987
512.00,4018,3428,3987
496.48,4657,3557,4112
512.00,4554,3357,3987
Starting sequential read test, please wait.
read speed and latency
speed,max,min,avg
KB/Sec,usec,usec,usec
115380.28,30,8,17
303407.41,8,6,6
309132.06,7,6,6
309132.06,7,6,6
309132.06,8,6,6
Done
Starting random read test, please wait.
Number of random reads: 1
Number of blocks: 8
read speed and latency
speed,max,min,avg
Position (bytes), Block: 0, 0
Read Time (ARM Cycle Delata): 3959
KB/Sec,usec,usec,usec
1820444.50,0,0,1
Position (bytes), Block: 8192, 4
Read Time (ARM Cycle Delata): 4223
KB/Sec,usec,usec,usec
1820444.50,0,0,1
Position (bytes), Block: 10240, 5
Read Time (ARM Cycle Delata): 4707
KB/Sec,usec,usec,usec
1638400.00,0,0,1
Position (bytes), Block: 2048, 1
Read Time (ARM Cycle Delata): 3953
KB/Sec,usec,usec,usec
1820444.50,0,0,1
Position (bytes), Block: 6144, 3
Read Time (ARM Cycle Delata): 5068
KB/Sec,usec,usec,usec
1638400.00,0,0,1
Done
Ran it with and without the 133mhz change to core and really no difference. Without is below:
Code:
Starting random read test, please wait.
Number of random reads: 1
Number of blocks: 8
read speed and latency
speed,max,min,avg
Position (bytes), Block: 0, 0
Read Time (ARM Cycle Delata): 3959
KB/Sec,usec,usec,usec
1820444.50,0,0,1
Position (bytes), Block: 8192, 4
Read Time (ARM Cycle Delata): 4223
KB/Sec,usec,usec,usec
1638400.00,0,0,1
Position (bytes), Block: 10240, 5
Read Time (ARM Cycle Delata): 4707
KB/Sec,usec,usec,usec
1638400.00,0,0,1
Position (bytes), Block: 2048, 1
Read Time (ARM Cycle Delata): 3953
KB/Sec,usec,usec,usec
1820444.50,0,0,1
Position (bytes), Block: 6144, 3
Read Time (ARM Cycle Delata): 4929
KB/Sec,usec,usec,usec
1489454.50,0,0,1
Just is case I did something wrong heres the modified sketch:
Code:
#include <LittleFS.h>
#include <Streaming.h>
LittleFS_Program myfs;
#define cout Serial
char szDiskMem[] = "PRO_DISK";
// File size in bytes.
const uint32_t FILE_SIZE = 16 * 1024;
// Set SKIP_FIRST_LATENCY true if the first read/write to the SD can
// be avoid by writing a file header or reading the first record.
const bool SKIP_FIRST_LATENCY = true;
// Size of read/write.
const size_t BUF_SIZE = 2048;
// Write pass count.
const uint8_t WRITE_COUNT = 5;
// Read pass count.
const uint8_t READ_COUNT = 5;
//Block size for qspi
#define MYBLKSIZE 2048 // 2048
// Insure 4-byte alignment.
uint32_t buf32[(BUF_SIZE + 3)/4];
uint8_t* buf = (uint8_t*)buf32;
//Number of random reads
#define randomReads 1
File file, file1;
void setup() {
while (!Serial) ; // wait
Serial.println("LittleFS Test"); delay(5);
if (!myfs.begin(1024 * 1024 * 6)) {
Serial.printf("Error starting %s\n", szDiskMem);
}
//myfs.lowLevelFormat('.');
float s;
uint32_t t;
uint32_t maxLatency;
uint32_t minLatency;
uint32_t totalLatency;
bool skipLatency;
myfs.remove("bench.dat");
//for(uint8_t cnt=0; cnt < 10; cnt++) {
// fill buf with known data
if (BUF_SIZE > 1) {
for (size_t i = 0; i < (BUF_SIZE - 2); i++) {
buf[i] = 'A' + (i % 26);
}
buf[BUF_SIZE-2] = '\r';
}
buf[BUF_SIZE-1] = '\n';
Serial.println("Disk Stats:");
Serial.printf("Bytes Used: %llu, Bytes Total:%llu\n", myfs.usedSize(), myfs.totalSize());
Serial.printf("Benchmark:\n");
cout << F("FILE_SIZE = ") << FILE_SIZE << endl;
cout << F("BUF_SIZE = ") << BUF_SIZE << F(" bytes\n");
cout << F("Starting write test, please wait.") << endl << endl;
// do write test
uint32_t n = FILE_SIZE/BUF_SIZE;
cout <<F("write speed and latency") << endl;
cout << F("speed,max,min,avg") << endl;
cout << F("KB/Sec,usec,usec,usec") << endl;
// open or create file - truncate existing file.
file = myfs.open("bench.dat", FILE_WRITE);
for (uint8_t nTest = 0; nTest < WRITE_COUNT; nTest++) {
file.seek(0);
maxLatency = 0;
minLatency = 9999999;
totalLatency = 0;
skipLatency = SKIP_FIRST_LATENCY;
t = millis();
for (uint32_t i = 0; i < n; i++) {
uint32_t m = micros();
if (file.write(buf, BUF_SIZE) != BUF_SIZE) {
Serial.println("write failed");
}
m = micros() - m;
totalLatency += m;
if (skipLatency) {
// Wait until first write to SD, not just a copy to the cache.
skipLatency = file.position() < 512;
} else {
if (maxLatency < m) {
maxLatency = m;
}
if (minLatency > m) {
minLatency = m;
}
}
}
t = millis() - t;
s = file.size();
cout << s/t <<',' << maxLatency << ',' << minLatency;
cout << ',' << totalLatency/n << endl;
}
cout << endl << F("Starting sequential read test, please wait.") << endl;
cout << endl <<F("read speed and latency") << endl;
cout << F("speed,max,min,avg") << endl;
cout << F("KB/Sec,usec,usec,usec") << endl;
// do read test
for (uint8_t nTest = 0; nTest < READ_COUNT; nTest++) {
file.seek(0);
maxLatency = 0;
minLatency = 9999999;
totalLatency = 0;
skipLatency = SKIP_FIRST_LATENCY;
t = micros();
for (uint32_t i = 0; i < n; i++) {
buf[BUF_SIZE-1] = 0;
uint32_t m = micros();
int32_t nr = file.read(buf, BUF_SIZE);
if (nr != BUF_SIZE) {
Serial.println("read failed");
}
m = micros() - m;
totalLatency += m;
if (buf[BUF_SIZE-1] != '\n') {
Serial.println("data check error");
}
if (skipLatency) {
skipLatency = false;
} else {
if (maxLatency < m) {
maxLatency = m;
}
if (minLatency > m) {
minLatency = m;
}
}
}
s = file.size();
t = micros() - t;
cout << s*1000/t <<',' << maxLatency << ',' << minLatency;
cout << ',' << totalLatency/n << endl;
}
cout << endl << F("Done") << endl;
cout << endl << F("Starting random read test, please wait.") << endl;
Serial.printf("Number of random reads: %d\n", randomReads);
Serial.printf("Number of blocks: %d\n", n);
cout << endl <<F("read speed and latency") << endl;
cout << F("speed,max,min,avg") << endl;
uint32_t tt;
// do read test
for (uint8_t nTest = 0; nTest < READ_COUNT; nTest++) {
file.seek(0);
maxLatency = 0;
minLatency = 0;
totalLatency = 0;
skipLatency = SKIP_FIRST_LATENCY;
t = micros();
for (uint32_t i = 0; i < randomReads; i++) {
buf[BUF_SIZE-1] = 0;
uint32_t m = micros();
uint32_t block_pos = random(0, (n-1));
uint32_t random_pos = block_pos* MYBLKSIZE;
cout << "Position (bytes), Block: " << random_pos << ", ";
cout << block_pos << endl;
uint32_t startCNT = ARM_DWT_CYCCNT;
file.seek(random_pos);
int32_t nr = file.read(buf, BUF_SIZE);
uint32_t endCNT = ARM_DWT_CYCCNT;
cout << F("Read Time (ARM Cycle Delata): ") << endCNT-startCNT << endl;
if (nr != BUF_SIZE) {
Serial.println("read failed");
}
m = micros() - m;
totalLatency += m;
if (buf[BUF_SIZE-1] != '\n') {
Serial.println("data check error");
}
if (skipLatency) {
skipLatency = false;
} else {
if (maxLatency < m) {
maxLatency = m;
}
if (minLatency > m) {
minLatency = m;
}
}
}
s = file.size();
t = micros() - t;
cout << F("KB/Sec,usec,usec,usec") << endl;
cout << s*1000/t <<',' << maxLatency << ',' << minLatency;
cout << ',' << totalLatency/n << endl;
}
cout << endl << F("Done") << endl;
file.close();
}
void loop() {
// put your main code here, to run repeatedly:
}