Poor performance X Benchmark
Posted: Fri Jan 08, 2021 4:42 pm
Hi,
In Wikipédia the information regarding processor speed is:
CPU: Xtensa dual-core (or single-core) 32-bit LX6 microprocessor, operating at 160 or 240 MHz and performing at up to 600 DMIPS
I prepared a small programm to test cpu speed and got strange results, because of this I have two questions:
1 – Why Iram program with DRAM variables had the worst performance
2 – Why the number of instructions per microsecond is so far from 240 or 600 as stated above
Below is the code:
Results:
Hello Processor!
This is ESP32 chip with 2 CPU cores, WiFi/BT/BLE, silicon revision 1, 4MB external flash
Rtc speed rtc_clk_xtal_freq_get 40
Processor speed 240
Overhead f_Iram_Internal_Calc_Cpu=1
Time execution Iram + internal variable=41701, instructions per microsecond=71
Overhead f_Iram_Dram_Calc_Cpu=1
Time execution Iram + Dram variable=79231, instructions per microsecond=37
Overhead f_Regular_DRAM_Calc_Cpu=4
Time execution Regular + DRAM variable=79203, instructions per microsecond=37
Overhead f_Regular_Internal_Calc_Cpu=1
Time execution Regular + Internal variable=41688, instructions per microsecond=71
In Wikipédia the information regarding processor speed is:
CPU: Xtensa dual-core (or single-core) 32-bit LX6 microprocessor, operating at 160 or 240 MHz and performing at up to 600 DMIPS
I prepared a small programm to test cpu speed and got strange results, because of this I have two questions:
1 – Why Iram program with DRAM variables had the worst performance
2 – Why the number of instructions per microsecond is so far from 240 or 600 as stated above
Below is the code:
Code: Select all
#include <stdio.h>
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_system.h"
#include "esp_spi_flash.h"
#include "esp_pm.h"
#include "esp_attr.h"
#include "esp32/clk.h"
#ifdef CONFIG_IDF_TARGET_ESP32
#define CHIP_NAME "ESP32"
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S2BETA
#define CHIP_NAME "ESP32-S2 Beta"
#endif
/*
* I imagine in asm each while loop below will use around 3 instructions:
* one instruction to add 1
* one instruction to compare
* the last instruction go to loop while <
*/
#define THREE_MILLION_INSTRUCTIONS 3000000
#define LOOP_ONE_MILLION 1000000
void IRAM_ATTR f_Iram_Internal_Calc_Cpu()
{
int I_Ind;
uint64_t I_Time_exec;
int I_Int_Val, I_over;
I_Ind = 0;
I_Time_exec = esp_timer_get_time();
I_over = esp_timer_get_time() - I_Time_exec;
printf("Overhead f_Iram_Internal_Calc_Cpu=%d\n",I_over);
I_Time_exec = esp_timer_get_time();
while (I_Ind < LOOP_ONE_MILLION)
{
I_Ind++;
}
I_Int_Val = esp_timer_get_time() - I_Time_exec - I_over;
printf("Time execution Iram + internal variable=%d, instructions per microsecond=%d\n",I_Int_Val, THREE_MILLION_INSTRUCTIONS/I_Int_Val);
}
DRAM_ATTR int D_Ind;
DRAM_ATTR uint64_t D_Time_exec;
DRAM_ATTR int D_Int_Val, D_over;
void IRAM_ATTR f_Iram_Dram_Calc_Cpu()
{
D_Ind = 0;
D_Time_exec = esp_timer_get_time();
D_over = esp_timer_get_time() - D_Time_exec;
printf("Overhead f_Iram_Dram_Calc_Cpu=%d\n",D_over);
D_Time_exec = esp_timer_get_time();
while (D_Ind < LOOP_ONE_MILLION)
{
D_Ind++;
}
D_Int_Val = esp_timer_get_time() - D_Time_exec - D_over;
printf("Time execution Iram + Dram variable=%d, instructions per microsecond=%d\n",D_Int_Val, THREE_MILLION_INSTRUCTIONS/D_Int_Val);
}
void f_Regular_DRAM_Calc_Cpu()
{
D_Ind = 0;
D_Time_exec = esp_timer_get_time();
D_over = esp_timer_get_time() - D_Time_exec;
printf("Overhead f_Regular_DRAM_Calc_Cpu=%d\n",D_over);
D_Time_exec = esp_timer_get_time();
while (D_Ind < LOOP_ONE_MILLION)
{
D_Ind++;
}
D_Int_Val = esp_timer_get_time() - D_Time_exec - D_over;
printf("Time execution Regular + DRAM variable=%d, instructions per microsecond=%d\n",D_Int_Val, THREE_MILLION_INSTRUCTIONS/D_Int_Val);
}
void f_Regular_Internal_Calc_Cpu()
{
int N_Ind;
uint64_t N_Time_exec;
int N_Int_Val, N_over;
N_Ind = 0;
N_Time_exec = esp_timer_get_time();
N_over = esp_timer_get_time() - N_Time_exec;
printf("Overhead f_Regular_Internal_Calc_Cpu=%d\n",N_over);
N_Time_exec = esp_timer_get_time();
while (N_Ind < LOOP_ONE_MILLION)
{
N_Ind++;
}
N_Int_Val = esp_timer_get_time() - N_Time_exec - N_over;
printf("Time execution Regular + Internal variable=%d, instructions per microsecond=%d\n",N_Int_Val, THREE_MILLION_INSTRUCTIONS/N_Int_Val);
}
void app_main(void)
{
int Int_Val;
printf("Hello Processor!\n");
/* Print chip information */
esp_chip_info_t chip_info;
esp_chip_info(&chip_info);
printf("This is %s chip with %d CPU cores, WiFi%s%s, ",
CHIP_NAME,
chip_info.cores,
(chip_info.features & CHIP_FEATURE_BT) ? "/BT" : "",
(chip_info.features & CHIP_FEATURE_BLE) ? "/BLE" : "");
printf("silicon revision %d, ", chip_info.revision);
printf("%dMB %s flash\n", spi_flash_get_chip_size() / (1024 * 1024),
(chip_info.features & CHIP_FEATURE_EMB_FLASH) ? "embedded" : "external");
// Será exibida a velocidade do processamento
Int_Val = rtc_clk_xtal_freq_get();
printf("\nRtc speed rtc_clk_xtal_freq_get %d\n", Int_Val);
Int_Val = esp_clk_cpu_freq() / LOOP_ONE_MILLION;
printf("Processor speed %d\n", Int_Val);
printf("\n\n");
f_Iram_Internal_Calc_Cpu();
printf("\n\n");
f_Iram_Dram_Calc_Cpu();
printf("\n\n");
f_Regular_DRAM_Calc_Cpu();
printf("\n\n");
f_Regular_Internal_Calc_Cpu();
while (true)
{
printf("Loop\n");
vTaskDelay(60000 / portTICK_PERIOD_MS);
}
}
Results:
Hello Processor!
This is ESP32 chip with 2 CPU cores, WiFi/BT/BLE, silicon revision 1, 4MB external flash
Rtc speed rtc_clk_xtal_freq_get 40
Processor speed 240
Overhead f_Iram_Internal_Calc_Cpu=1
Time execution Iram + internal variable=41701, instructions per microsecond=71
Overhead f_Iram_Dram_Calc_Cpu=1
Time execution Iram + Dram variable=79231, instructions per microsecond=37
Overhead f_Regular_DRAM_Calc_Cpu=4
Time execution Regular + DRAM variable=79203, instructions per microsecond=37
Overhead f_Regular_Internal_Calc_Cpu=1
Time execution Regular + Internal variable=41688, instructions per microsecond=71