audioreactive optimization for -C3

some optimizations for -C3. * skip every second FFT, and use interpolation instead. * reduce sampling rate from 22Khz to 18Khz * new debug measurement for I2S cycle times (FFTTask) --> in total, this brings CPU load for sound processing down to 60% (from >100% previously)
2023-01-18 18:40:37 +01:00
parent ceab107602
commit 5575d6ab11
2 changed files with 105 additions and 49 deletions
--- a/usermods/audioreactive/audio_reactive.h
+++ b/usermods/audioreactive/audio_reactive.h
@@ -225,17 +225,20 @@ static float FFT_MajorPeak = 1.0f;              // FFT: strongest (peak) frequen
 static float FFT_Magnitude = 0.0f;              // FFT: volume (magnitude) of peak frequency
 static uint8_t fftResult[NUM_GEQ_CHANNELS]= {0};// Our calculated freq. channel result table to be used by effects
 #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
-static uint64_t fftTime = 0;
-static uint64_t sampleTime = 0;
+static uint64_t fftTaskCycle = 0;      // avg cycle time for FFT task
+static uint64_t fftTime = 0;           // avg time for single FFT
+static uint64_t sampleTime = 0;        // avg (blocked) time for reading I2S samples
 #endif

 // FFT Task variables (filtering and post-processing)
+static float   lastFftCalc[NUM_GEQ_CHANNELS] = {0.0f};                // backup of last FFT channels (before postprocessing)
 static float   fftCalc[NUM_GEQ_CHANNELS] = {0.0f};                    // Try and normalize fftBin values to a max of 4096, so that 4096/16 = 256.
 static float   fftAvg[NUM_GEQ_CHANNELS] = {0.0f};                     // Calculated frequency channel results, with smoothing (used if dynamics limiter is ON)
 #ifdef SR_DEBUG
 static float   fftResultMax[NUM_GEQ_CHANNELS] = {0.0f};               // A table used for testing to determine how our post-processing is working.
 #endif

+#if !defined(CONFIG_IDF_TARGET_ESP32C3)
 // audio source parameters and constant
 constexpr SRate_t SAMPLE_RATE = 22050;        // Base sample rate in Hz - 22Khz is a standard rate. Physical sample time -> 23ms
 //constexpr SRate_t SAMPLE_RATE = 16000;        // 16kHz - use if FFTtask takes more than 20ms. Physical sample time -> 32ms
@@ -245,6 +248,16 @@ constexpr SRate_t SAMPLE_RATE = 22050;        // Base sample rate in Hz - 22Khz
 //#define FFT_MIN_CYCLE 30                      // Use with 16Khz sampling
 //#define FFT_MIN_CYCLE 23                      // minimum time before FFT task is repeated. Use with 20Khz sampling
 //#define FFT_MIN_CYCLE 46                      // minimum time before FFT task is repeated. Use with 10Khz sampling
+#else
+// slightly lower the sampling rate for -C3, to improve stability
+//constexpr SRate_t SAMPLE_RATE = 20480;        // 20Khz; Physical sample time -> 25ms
+//#define FFT_MIN_CYCLE 23                      // minimum time before FFT task is repeated.
+constexpr SRate_t SAMPLE_RATE = 18000;          // 18Khz; Physical sample time -> 28ms
+#define FFT_MIN_CYCLE 25                        // minimum time before FFT task is repeated.
+// try 16Khz in case your device still lags and responds too slowly.
+//constexpr SRate_t SAMPLE_RATE = 16000;        // 16Khz -> Physical sample time -> 32ms
+//#define FFT_MIN_CYCLE 30                      // minimum time before FFT task is repeated.
+#endif

 // FFT Constants
 constexpr uint16_t samplesFFT = 512;            // Samples in an FFT batch - This value MUST ALWAYS be a power of 2
@@ -308,6 +321,11 @@ static float fftAddAvg(int from, int to) {
 }
 #endif

+#if defined(CONFIG_IDF_TARGET_ESP32C3)
+constexpr bool skipSecondFFT = true;
+#else
+constexpr bool skipSecondFFT = false;
+#endif
 //
 // FFT main task
 //
@@ -317,6 +335,8 @@ void FFTcode(void * parameter)

  // see https://www.freertos.org/vtaskdelayuntil.html
  const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS;  
+  const TickType_t xFrequencyDouble = FFT_MIN_CYCLE * portTICK_PERIOD_MS * 2;  
+  static bool isFirstRun = false;

  TickType_t xLastWakeTime = xTaskGetTickCount();
  for(;;) {
@@ -325,6 +345,7 @@ void FFTcode(void * parameter)

    // Don't run FFT computing code if we're in Receive mode or in realtime mode
    if (disableSoundProcessing || (audioSyncEnabled & 0x02)) {
+      isFirstRun = false;
      vTaskDelayUntil( &xLastWakeTime, xFrequency);        // release CPU, and let I2S fill its buffers
      continue;
    }
@@ -332,6 +353,15 @@ void FFTcode(void * parameter)
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
    uint64_t start = esp_timer_get_time();
    bool haveDoneFFT = false; // indicates if second measurement (FFT time) is valid
+
+    static uint64_t lastCycleStart = 0;
+    static uint64_t lastLastTime = 0;
+    if ((lastCycleStart > 0) && (lastCycleStart < start)) { // filter out overflows
+      uint64_t taskTimeInMillis = ((start - lastCycleStart) +5ULL) / 10ULL; // "+5" to ensure proper rounding
+      fftTaskCycle = (((taskTimeInMillis + lastLastTime)/2) *4 + fftTime*6)/10; // smart smooth
+      lastLastTime = taskTimeInMillis;
+    }
+    lastCycleStart = start;
 #endif

    // get a fresh batch of samples from I2S
@@ -346,6 +376,7 @@ void FFTcode(void * parameter)
 #endif

    xLastWakeTime = xTaskGetTickCount();       // update "last unblocked time" for vTaskDelay
+    isFirstRun = !isFirstRun; //  toggle throtte

 #ifdef MIC_LOGGER
    float datMin = 0.0f;
@@ -399,38 +430,41 @@ void FFTcode(void * parameter)
    // run FFT (takes 3-5ms on ESP32)
    //if (fabsf(sampleAvg) > 0.25f) { // noise gate open
    if (fabsf(volumeSmth) > 0.25f) { // noise gate open
+      if ((skipSecondFFT == false) || (isFirstRun == true)) {
+        // run FFT (takes 2-3ms on ESP32, ~12ms on ESP32-S2, ~30ms on -C3)
+        #ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
+        FFT.dcRemoval();                                            // remove DC offset
+        #if !defined(FFT_PREFER_EXACT_PEAKS)
+          FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward);        // Weigh data using "Flat Top" function - better amplitude accuracy
+        #else
+          FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+        #endif
+        FFT.compute( FFTDirection::Forward );                       // Compute FFT
+        FFT.complexToMagnitude();                                   // Compute magnitudes
+        #else
+        FFT.DCRemoval(); // let FFT lib remove DC component, so we don't need to care about this in getSamples()

-      // run FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2)
-#ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
-      FFT.dcRemoval();                                            // remove DC offset
-      #if !defined(FFT_PREFER_EXACT_PEAKS)
-        FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward);        // Weigh data using "Flat Top" function - better amplitude accuracy
-      #else
-        FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
-      #endif
-      FFT.compute( FFTDirection::Forward );                       // Compute FFT
-      FFT.complexToMagnitude();                                   // Compute magnitudes
-#else
-      FFT.DCRemoval(); // let FFT lib remove DC component, so we don't need to care about this in getSamples()
+        //FFT.Windowing( FFT_WIN_TYP_HAMMING, FFT_FORWARD );        // Weigh data - standard Hamming window
+        //FFT.Windowing( FFT_WIN_TYP_BLACKMAN, FFT_FORWARD );       // Blackman window - better side freq rejection
+        #if !defined(FFT_PREFER_EXACT_PEAKS)
+          FFT.Windowing( FFT_WIN_TYP_FLT_TOP, FFT_FORWARD );        // Flat Top Window - better amplitude accuracy
+        #else
+          FFT.Windowing( FFT_WIN_TYP_BLACKMAN_HARRIS, FFT_FORWARD );// Blackman-Harris - excellent sideband rejection
+        #endif
+        FFT.Compute( FFT_FORWARD );                             // Compute FFT
+        FFT.ComplexToMagnitude();                               // Compute magnitudes
+        #endif

-      //FFT.Windowing( FFT_WIN_TYP_HAMMING, FFT_FORWARD );        // Weigh data - standard Hamming window
-      //FFT.Windowing( FFT_WIN_TYP_BLACKMAN, FFT_FORWARD );       // Blackman window - better side freq rejection
-      #if !defined(FFT_PREFER_EXACT_PEAKS)
-        FFT.Windowing( FFT_WIN_TYP_FLT_TOP, FFT_FORWARD );        // Flat Top Window - better amplitude accuracy
-      #else
-        FFT.Windowing( FFT_WIN_TYP_BLACKMAN_HARRIS, FFT_FORWARD );// Blackman-Harris - excellent sideband rejection
-      #endif
-      FFT.Compute( FFT_FORWARD );                             // Compute FFT
-      FFT.ComplexToMagnitude();                               // Compute magnitudes
-#endif
-
-#ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
-      FFT.majorPeak(FFT_MajorPeak, FFT_Magnitude);                // let the effects know which freq was most dominant
-#else
-      FFT.MajorPeak(&FFT_MajorPeak, &FFT_Magnitude);              // let the effects know which freq was most dominant
-#endif
-      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
+        #ifdef UM_AUDIOREACTIVE_USE_NEW_FFT
+          FFT.majorPeak(FFT_MajorPeak, FFT_Magnitude);                // let the effects know which freq was most dominant
+        #else
+        FFT.MajorPeak(&FFT_MajorPeak, &FFT_Magnitude);              // let the effects know which freq was most dominant
+        #endif
+        FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects

+      } else { // skip second run --> clear fft results, keep peaks
+        memset(vReal, 0, sizeof(vReal)); 
+      }
 #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
      haveDoneFFT = true;
 #endif
@@ -441,14 +475,16 @@ void FFTcode(void * parameter)
      FFT_Magnitude = 0.001;
    }

-    for (int i = 0; i < samplesFFT; i++) {
-      float t = fabsf(vReal[i]);                      // just to be sure - values in fft bins should be positive any way
-      vReal[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
-    } // for()
+    if ((skipSecondFFT == false) || (isFirstRun == true)) {

-    // mapping of FFT result bins to frequency channels
-    //if (fabsf(sampleAvg) > 0.25f) { // noise gate open
-    if (fabsf(volumeSmth) > 0.25f) { // noise gate open
+      for (int i = 0; i < samplesFFT; i++) {
+        float t = fabsf(vReal[i]);                      // just to be sure - values in fft bins should be positive any way
+        vReal[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
+      } // for()
+
+      // mapping of FFT result bins to frequency channels
+      //if (fabsf(sampleAvg) > 0.25f) { // noise gate open
+      if (fabsf(volumeSmth) > 0.25f) { // noise gate open
 #if 0
    /* This FFT post processing is a DIY endeavour. What we really need is someone with sound engineering expertise to do a great job here AND most importantly, that the animations look GREAT as a result.
    *
@@ -506,24 +542,34 @@ void FFTcode(void * parameter)
      fftCalc[13] = fftAddAvg(86,104);              // 18 3704 - 4479 high mid
      fftCalc[14] = fftAddAvg(104,165) * 0.88f;     // 61 4479 - 7106 high mid + high  -- with slight damping
 #endif
-    } else {  // noise gate closed - just decay old values
-      for (int i=0; i < NUM_GEQ_CHANNELS; i++) {
-        fftCalc[i] *= 0.85f;  // decay to zero
-        if (fftCalc[i] < 4.0f) fftCalc[i] = 0.0f;
+      } else {  // noise gate closed - just decay old values
+        isFirstRun = false;
+        for (int i=0; i < NUM_GEQ_CHANNELS; i++) {
+          fftCalc[i] *= 0.85f;  // decay to zero
+          if (fftCalc[i] < 4.0f) fftCalc[i] = 0.0f;
+        }
      }
+
+      memcpy(lastFftCalc, fftCalc, sizeof(lastFftCalc)); // make a backup of last "good" channels
+
+    } else { // if second run skipped
+      memcpy(fftCalc, lastFftCalc, sizeof(fftCalc)); // restore last "good" channels
    }

    // post-processing of frequency channels (pink noise adjustment, AGC, smooting, scaling)
    if (pinkIndex > MAX_PINK) pinkIndex = MAX_PINK;
    //postProcessFFTResults((fabsf(sampleAvg) > 0.25f)? true : false , NUM_GEQ_CHANNELS);
-    postProcessFFTResults((fabsf(volumeSmth)>0.25f)? true : false , NUM_GEQ_CHANNELS);
+    postProcessFFTResults((fabsf(volumeSmth)>0.25f)? true : false , NUM_GEQ_CHANNELS);    // this function modifies fftCalc, fftAvg and fftResult

 #if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
+    static uint64_t lastLastFFT = 0;
    if (haveDoneFFT && (start < esp_timer_get_time())) { // filter out overflows
      uint64_t fftTimeInMillis = ((esp_timer_get_time() - start) +5ULL) / 10ULL; // "+5" to ensure proper rounding
-      fftTime  = (fftTimeInMillis*3 + fftTime*7)/10; // smooth
+      fftTime  = (((fftTimeInMillis + lastLastFFT)/2) *3 + fftTime*7)/10; // smart smooth
+      lastLastFFT = fftTimeInMillis;
    }
 #endif
+
    // run peak detection
    autoResetPeak();
    detectSamplePeak();
@@ -531,8 +577,13 @@ void FFTcode(void * parameter)
    #if !defined(I2S_GRAB_ADC1_COMPLETELY)    
    if ((audioSource == nullptr) || (audioSource->getType() != AudioSource::Type_I2SAdc))  // the "delay trick" does not help for analog ADC
    #endif
-      vTaskDelayUntil( &xLastWakeTime, xFrequency);        // release CPU, and let I2S fill its buffers
-
+    {
+      if ((skipSecondFFT == false) || (fabsf(volumeSmth) < 0.25f)) {
+        vTaskDelayUntil( &xLastWakeTime, xFrequency);        // release CPU, and let I2S fill its buffers
+      } else if (isFirstRun == true) {
+        vTaskDelayUntil( &xLastWakeTime, xFrequencyDouble);  // release CPU after performing FFT in "skip second run" mode
+      }
+    }
  } // for(;;)ever
 } // FFTcode() task end

@@ -1774,6 +1825,10 @@ class AudioReactive : public Usermod {
        }

        #if defined(WLED_DEBUG) || defined(SR_DEBUG) || defined(SR_STATS)
+        infoArr = user.createNestedArray(F("I2S cycle time"));
+        infoArr.add(float(fftTaskCycle)/100.0f);
+        infoArr.add(" ms");
+
        infoArr = user.createNestedArray(F("Sampling time"));
        infoArr.add(float(sampleTime)/100.0f);
        infoArr.add(" ms");
@@ -1787,8 +1842,9 @@ class AudioReactive : public Usermod {
        else
          infoArr.add(" ms");

-        DEBUGSR_PRINTF("AR Sampling time: %5.2f ms\n", float(sampleTime)/100.0f);
-        DEBUGSR_PRINTF("AR FFT time     : %5.2f ms\n", float(fftTime)/100.0f);
+        DEBUGSR_PRINTF("AR I2S cycle time: %5.2f ms\n", float(fftTaskCycle)/100.0f);
+        DEBUGSR_PRINTF("AR Sampling time : %5.2f ms\n", float(sampleTime)/100.0f);
+        DEBUGSR_PRINTF("AR FFT time      : %5.2f ms\n", float(fftTime)/100.0f);
        #endif
      }
    }
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -23,7 +23,7 @@

 // see https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/hw-reference/chip-series-comparison.html#related-documents
 // and https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/i2s.html#overview-of-all-modes
-#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
+#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
  // there are two things in these MCUs that could lead to problems with audio processing:
  // * no floating point hardware (FPU) support - FFT uses float calculations. If done in software, a strong slow-down can be expected (between 8x and 20x)
  // * single core, so FFT task might slow down other things like LED updates