audioreactive optimization for -C3

some optimizations for -C3.

* skip every second FFT, and use interpolation instead.
* reduce sampling rate from 22Khz to 18Khz
* new debug measurement for I2S cycle times (FFTTask)

--> in total, this brings CPU load for sound processing down to 60% (from >100% previously)
This commit is contained in:
Frank
2023-01-18 18:40:37 +01:00
parent ceab107602
commit 5575d6ab11
2 changed files with 105 additions and 49 deletions

View File

@@ -225,17 +225,20 @@ static float FFT_MajorPeak = 1.0f; // FFT: strongest (peak) frequen
static float FFT_Magnitude = 0.0f; // FFT: volume (magnitude) of peak frequency static float FFT_Magnitude = 0.0f; // FFT: volume (magnitude) of peak frequency
static uint8_t fftResult[NUM_GEQ_CHANNELS]= {0};// Our calculated freq. channel result table to be used by effects static uint8_t fftResult[NUM_GEQ_CHANNELS]= {0};// Our calculated freq. channel result table to be used by effects
#if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS) #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
static uint64_t fftTime = 0; static uint64_t fftTaskCycle = 0; // avg cycle time for FFT task
static uint64_t sampleTime = 0; static uint64_t fftTime = 0; // avg time for single FFT
static uint64_t sampleTime = 0; // avg (blocked) time for reading I2S samples
#endif #endif
// FFT Task variables (filtering and post-processing) // FFT Task variables (filtering and post-processing)
static float lastFftCalc[NUM_GEQ_CHANNELS] = {0.0f}; // backup of last FFT channels (before postprocessing)
static float fftCalc[NUM_GEQ_CHANNELS] = {0.0f}; // Try and normalize fftBin values to a max of 4096, so that 4096/16 = 256. static float fftCalc[NUM_GEQ_CHANNELS] = {0.0f}; // Try and normalize fftBin values to a max of 4096, so that 4096/16 = 256.
static float fftAvg[NUM_GEQ_CHANNELS] = {0.0f}; // Calculated frequency channel results, with smoothing (used if dynamics limiter is ON) static float fftAvg[NUM_GEQ_CHANNELS] = {0.0f}; // Calculated frequency channel results, with smoothing (used if dynamics limiter is ON)
#ifdef SR_DEBUG #ifdef SR_DEBUG
static float fftResultMax[NUM_GEQ_CHANNELS] = {0.0f}; // A table used for testing to determine how our post-processing is working. static float fftResultMax[NUM_GEQ_CHANNELS] = {0.0f}; // A table used for testing to determine how our post-processing is working.
#endif #endif
#if !defined(CONFIG_IDF_TARGET_ESP32C3)
// audio source parameters and constant // audio source parameters and constant
constexpr SRate_t SAMPLE_RATE = 22050; // Base sample rate in Hz - 22Khz is a standard rate. Physical sample time -> 23ms constexpr SRate_t SAMPLE_RATE = 22050; // Base sample rate in Hz - 22Khz is a standard rate. Physical sample time -> 23ms
//constexpr SRate_t SAMPLE_RATE = 16000; // 16kHz - use if FFTtask takes more than 20ms. Physical sample time -> 32ms //constexpr SRate_t SAMPLE_RATE = 16000; // 16kHz - use if FFTtask takes more than 20ms. Physical sample time -> 32ms
@@ -245,6 +248,16 @@ constexpr SRate_t SAMPLE_RATE = 22050; // Base sample rate in Hz - 22Khz
//#define FFT_MIN_CYCLE 30 // Use with 16Khz sampling //#define FFT_MIN_CYCLE 30 // Use with 16Khz sampling
//#define FFT_MIN_CYCLE 23 // minimum time before FFT task is repeated. Use with 20Khz sampling //#define FFT_MIN_CYCLE 23 // minimum time before FFT task is repeated. Use with 20Khz sampling
//#define FFT_MIN_CYCLE 46 // minimum time before FFT task is repeated. Use with 10Khz sampling //#define FFT_MIN_CYCLE 46 // minimum time before FFT task is repeated. Use with 10Khz sampling
#else
// slightly lower the sampling rate for -C3, to improve stability
//constexpr SRate_t SAMPLE_RATE = 20480; // 20Khz; Physical sample time -> 25ms
//#define FFT_MIN_CYCLE 23 // minimum time before FFT task is repeated.
constexpr SRate_t SAMPLE_RATE = 18000; // 18Khz; Physical sample time -> 28ms
#define FFT_MIN_CYCLE 25 // minimum time before FFT task is repeated.
// try 16Khz in case your device still lags and responds too slowly.
//constexpr SRate_t SAMPLE_RATE = 16000; // 16Khz -> Physical sample time -> 32ms
//#define FFT_MIN_CYCLE 30 // minimum time before FFT task is repeated.
#endif
// FFT Constants // FFT Constants
constexpr uint16_t samplesFFT = 512; // Samples in an FFT batch - This value MUST ALWAYS be a power of 2 constexpr uint16_t samplesFFT = 512; // Samples in an FFT batch - This value MUST ALWAYS be a power of 2
@@ -308,6 +321,11 @@ static float fftAddAvg(int from, int to) {
} }
#endif #endif
#if defined(CONFIG_IDF_TARGET_ESP32C3)
constexpr bool skipSecondFFT = true;
#else
constexpr bool skipSecondFFT = false;
#endif
// //
// FFT main task // FFT main task
// //
@@ -317,6 +335,8 @@ void FFTcode(void * parameter)
// see https://www.freertos.org/vtaskdelayuntil.html // see https://www.freertos.org/vtaskdelayuntil.html
const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS; const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS;
const TickType_t xFrequencyDouble = FFT_MIN_CYCLE * portTICK_PERIOD_MS * 2;
static bool isFirstRun = false;
TickType_t xLastWakeTime = xTaskGetTickCount(); TickType_t xLastWakeTime = xTaskGetTickCount();
for(;;) { for(;;) {
@@ -325,6 +345,7 @@ void FFTcode(void * parameter)
// Don't run FFT computing code if we're in Receive mode or in realtime mode // Don't run FFT computing code if we're in Receive mode or in realtime mode
if (disableSoundProcessing || (audioSyncEnabled & 0x02)) { if (disableSoundProcessing || (audioSyncEnabled & 0x02)) {
isFirstRun = false;
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
continue; continue;
} }
@@ -332,6 +353,15 @@ void FFTcode(void * parameter)
#if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS) #if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
uint64_t start = esp_timer_get_time(); uint64_t start = esp_timer_get_time();
bool haveDoneFFT = false; // indicates if second measurement (FFT time) is valid bool haveDoneFFT = false; // indicates if second measurement (FFT time) is valid
static uint64_t lastCycleStart = 0;
static uint64_t lastLastTime = 0;
if ((lastCycleStart > 0) && (lastCycleStart < start)) { // filter out overflows
uint64_t taskTimeInMillis = ((start - lastCycleStart) +5ULL) / 10ULL; // "+5" to ensure proper rounding
fftTaskCycle = (((taskTimeInMillis + lastLastTime)/2) *4 + fftTime*6)/10; // smart smooth
lastLastTime = taskTimeInMillis;
}
lastCycleStart = start;
#endif #endif
// get a fresh batch of samples from I2S // get a fresh batch of samples from I2S
@@ -346,6 +376,7 @@ void FFTcode(void * parameter)
#endif #endif
xLastWakeTime = xTaskGetTickCount(); // update "last unblocked time" for vTaskDelay xLastWakeTime = xTaskGetTickCount(); // update "last unblocked time" for vTaskDelay
isFirstRun = !isFirstRun; // toggle throtte
#ifdef MIC_LOGGER #ifdef MIC_LOGGER
float datMin = 0.0f; float datMin = 0.0f;
@@ -399,38 +430,41 @@ void FFTcode(void * parameter)
// run FFT (takes 3-5ms on ESP32) // run FFT (takes 3-5ms on ESP32)
//if (fabsf(sampleAvg) > 0.25f) { // noise gate open //if (fabsf(sampleAvg) > 0.25f) { // noise gate open
if (fabsf(volumeSmth) > 0.25f) { // noise gate open if (fabsf(volumeSmth) > 0.25f) { // noise gate open
if ((skipSecondFFT == false) || (isFirstRun == true)) {
// run FFT (takes 2-3ms on ESP32, ~12ms on ESP32-S2, ~30ms on -C3)
#ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
FFT.dcRemoval(); // remove DC offset
#if !defined(FFT_PREFER_EXACT_PEAKS)
FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy
#else
FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward); // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
#endif
FFT.compute( FFTDirection::Forward ); // Compute FFT
FFT.complexToMagnitude(); // Compute magnitudes
#else
FFT.DCRemoval(); // let FFT lib remove DC component, so we don't need to care about this in getSamples()
// run FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2) //FFT.Windowing( FFT_WIN_TYP_HAMMING, FFT_FORWARD ); // Weigh data - standard Hamming window
#ifdef UM_AUDIOREACTIVE_USE_NEW_FFT //FFT.Windowing( FFT_WIN_TYP_BLACKMAN, FFT_FORWARD ); // Blackman window - better side freq rejection
FFT.dcRemoval(); // remove DC offset #if !defined(FFT_PREFER_EXACT_PEAKS)
#if !defined(FFT_PREFER_EXACT_PEAKS) FFT.Windowing( FFT_WIN_TYP_FLT_TOP, FFT_FORWARD ); // Flat Top Window - better amplitude accuracy
FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy #else
#else FFT.Windowing( FFT_WIN_TYP_BLACKMAN_HARRIS, FFT_FORWARD );// Blackman-Harris - excellent sideband rejection
FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward); // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection #endif
#endif FFT.Compute( FFT_FORWARD ); // Compute FFT
FFT.compute( FFTDirection::Forward ); // Compute FFT FFT.ComplexToMagnitude(); // Compute magnitudes
FFT.complexToMagnitude(); // Compute magnitudes #endif
#else
FFT.DCRemoval(); // let FFT lib remove DC component, so we don't need to care about this in getSamples()
//FFT.Windowing( FFT_WIN_TYP_HAMMING, FFT_FORWARD ); // Weigh data - standard Hamming window #ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
//FFT.Windowing( FFT_WIN_TYP_BLACKMAN, FFT_FORWARD ); // Blackman window - better side freq rejection FFT.majorPeak(FFT_MajorPeak, FFT_Magnitude); // let the effects know which freq was most dominant
#if !defined(FFT_PREFER_EXACT_PEAKS) #else
FFT.Windowing( FFT_WIN_TYP_FLT_TOP, FFT_FORWARD ); // Flat Top Window - better amplitude accuracy FFT.MajorPeak(&FFT_MajorPeak, &FFT_Magnitude); // let the effects know which freq was most dominant
#else #endif
FFT.Windowing( FFT_WIN_TYP_BLACKMAN_HARRIS, FFT_FORWARD );// Blackman-Harris - excellent sideband rejection FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f); // restrict value to range expected by effects
#endif
FFT.Compute( FFT_FORWARD ); // Compute FFT
FFT.ComplexToMagnitude(); // Compute magnitudes
#endif
#ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
FFT.majorPeak(FFT_MajorPeak, FFT_Magnitude); // let the effects know which freq was most dominant
#else
FFT.MajorPeak(&FFT_MajorPeak, &FFT_Magnitude); // let the effects know which freq was most dominant
#endif
FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f); // restrict value to range expected by effects
} else { // skip second run --> clear fft results, keep peaks
memset(vReal, 0, sizeof(vReal));
}
#if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS) #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
haveDoneFFT = true; haveDoneFFT = true;
#endif #endif
@@ -441,14 +475,16 @@ void FFTcode(void * parameter)
FFT_Magnitude = 0.001; FFT_Magnitude = 0.001;
} }
for (int i = 0; i < samplesFFT; i++) { if ((skipSecondFFT == false) || (isFirstRun == true)) {
float t = fabsf(vReal[i]); // just to be sure - values in fft bins should be positive any way
vReal[i] = t / 16.0f; // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
} // for()
// mapping of FFT result bins to frequency channels for (int i = 0; i < samplesFFT; i++) {
//if (fabsf(sampleAvg) > 0.25f) { // noise gate open float t = fabsf(vReal[i]); // just to be sure - values in fft bins should be positive any way
if (fabsf(volumeSmth) > 0.25f) { // noise gate open vReal[i] = t / 16.0f; // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
} // for()
// mapping of FFT result bins to frequency channels
//if (fabsf(sampleAvg) > 0.25f) { // noise gate open
if (fabsf(volumeSmth) > 0.25f) { // noise gate open
#if 0 #if 0
/* This FFT post processing is a DIY endeavour. What we really need is someone with sound engineering expertise to do a great job here AND most importantly, that the animations look GREAT as a result. /* This FFT post processing is a DIY endeavour. What we really need is someone with sound engineering expertise to do a great job here AND most importantly, that the animations look GREAT as a result.
* *
@@ -506,24 +542,34 @@ void FFTcode(void * parameter)
fftCalc[13] = fftAddAvg(86,104); // 18 3704 - 4479 high mid fftCalc[13] = fftAddAvg(86,104); // 18 3704 - 4479 high mid
fftCalc[14] = fftAddAvg(104,165) * 0.88f; // 61 4479 - 7106 high mid + high -- with slight damping fftCalc[14] = fftAddAvg(104,165) * 0.88f; // 61 4479 - 7106 high mid + high -- with slight damping
#endif #endif
} else { // noise gate closed - just decay old values } else { // noise gate closed - just decay old values
for (int i=0; i < NUM_GEQ_CHANNELS; i++) { isFirstRun = false;
fftCalc[i] *= 0.85f; // decay to zero for (int i=0; i < NUM_GEQ_CHANNELS; i++) {
if (fftCalc[i] < 4.0f) fftCalc[i] = 0.0f; fftCalc[i] *= 0.85f; // decay to zero
if (fftCalc[i] < 4.0f) fftCalc[i] = 0.0f;
}
} }
memcpy(lastFftCalc, fftCalc, sizeof(lastFftCalc)); // make a backup of last "good" channels
} else { // if second run skipped
memcpy(fftCalc, lastFftCalc, sizeof(fftCalc)); // restore last "good" channels
} }
// post-processing of frequency channels (pink noise adjustment, AGC, smooting, scaling) // post-processing of frequency channels (pink noise adjustment, AGC, smooting, scaling)
if (pinkIndex > MAX_PINK) pinkIndex = MAX_PINK; if (pinkIndex > MAX_PINK) pinkIndex = MAX_PINK;
//postProcessFFTResults((fabsf(sampleAvg) > 0.25f)? true : false , NUM_GEQ_CHANNELS); //postProcessFFTResults((fabsf(sampleAvg) > 0.25f)? true : false , NUM_GEQ_CHANNELS);
postProcessFFTResults((fabsf(volumeSmth)>0.25f)? true : false , NUM_GEQ_CHANNELS); postProcessFFTResults((fabsf(volumeSmth)>0.25f)? true : false , NUM_GEQ_CHANNELS); // this function modifies fftCalc, fftAvg and fftResult
#if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS) #if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
static uint64_t lastLastFFT = 0;
if (haveDoneFFT && (start < esp_timer_get_time())) { // filter out overflows if (haveDoneFFT && (start < esp_timer_get_time())) { // filter out overflows
uint64_t fftTimeInMillis = ((esp_timer_get_time() - start) +5ULL) / 10ULL; // "+5" to ensure proper rounding uint64_t fftTimeInMillis = ((esp_timer_get_time() - start) +5ULL) / 10ULL; // "+5" to ensure proper rounding
fftTime = (fftTimeInMillis*3 + fftTime*7)/10; // smooth fftTime = (((fftTimeInMillis + lastLastFFT)/2) *3 + fftTime*7)/10; // smart smooth
lastLastFFT = fftTimeInMillis;
} }
#endif #endif
// run peak detection // run peak detection
autoResetPeak(); autoResetPeak();
detectSamplePeak(); detectSamplePeak();
@@ -531,8 +577,13 @@ void FFTcode(void * parameter)
#if !defined(I2S_GRAB_ADC1_COMPLETELY) #if !defined(I2S_GRAB_ADC1_COMPLETELY)
if ((audioSource == nullptr) || (audioSource->getType() != AudioSource::Type_I2SAdc)) // the "delay trick" does not help for analog ADC if ((audioSource == nullptr) || (audioSource->getType() != AudioSource::Type_I2SAdc)) // the "delay trick" does not help for analog ADC
#endif #endif
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers {
if ((skipSecondFFT == false) || (fabsf(volumeSmth) < 0.25f)) {
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
} else if (isFirstRun == true) {
vTaskDelayUntil( &xLastWakeTime, xFrequencyDouble); // release CPU after performing FFT in "skip second run" mode
}
}
} // for(;;)ever } // for(;;)ever
} // FFTcode() task end } // FFTcode() task end
@@ -1774,6 +1825,10 @@ class AudioReactive : public Usermod {
} }
#if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS) #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
infoArr = user.createNestedArray(F("I2S cycle time"));
infoArr.add(float(fftTaskCycle)/100.0f);
infoArr.add(" ms");
infoArr = user.createNestedArray(F("Sampling time")); infoArr = user.createNestedArray(F("Sampling time"));
infoArr.add(float(sampleTime)/100.0f); infoArr.add(float(sampleTime)/100.0f);
infoArr.add(" ms"); infoArr.add(" ms");
@@ -1787,8 +1842,9 @@ class AudioReactive : public Usermod {
else else
infoArr.add(" ms"); infoArr.add(" ms");
DEBUGSR_PRINTF("AR Sampling time: %5.2f ms\n", float(sampleTime)/100.0f); DEBUGSR_PRINTF("AR I2S cycle time: %5.2f ms\n", float(fftTaskCycle)/100.0f);
DEBUGSR_PRINTF("AR FFT time : %5.2f ms\n", float(fftTime)/100.0f); DEBUGSR_PRINTF("AR Sampling time : %5.2f ms\n", float(sampleTime)/100.0f);
DEBUGSR_PRINTF("AR FFT time : %5.2f ms\n", float(fftTime)/100.0f);
#endif #endif
} }
} }

View File

@@ -23,7 +23,7 @@
// see https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/hw-reference/chip-series-comparison.html#related-documents // see https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/hw-reference/chip-series-comparison.html#related-documents
// and https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/i2s.html#overview-of-all-modes // and https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/i2s.html#overview-of-all-modes
#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265) #if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
// there are two things in these MCUs that could lead to problems with audio processing: // there are two things in these MCUs that could lead to problems with audio processing:
// * no floating point hardware (FPU) support - FFT uses float calculations. If done in software, a strong slow-down can be expected (between 8x and 20x) // * no floating point hardware (FPU) support - FFT uses float calculations. If done in software, a strong slow-down can be expected (between 8x and 20x)
// * single core, so FFT task might slow down other things like LED updates // * single core, so FFT task might slow down other things like LED updates