From e98858c751a9e29d59f6a1f46bd94acb2f76749b Mon Sep 17 00:00:00 2001
From: Frank <91616163+softhack007@users.noreply.github.com>
Date: Mon, 1 Jul 2024 16:29:09 +0200
Subject: [PATCH] audio FASTPATH, part 2
* introducing sliding window FFT, which effectively doubles the rate of samples - and FFT results - produced per second. As a side-effect, it also makes FFT a bit less noisy.
As sliding window FFT requires double the number of FFT runs, the feature is only enabled on esp32 and esp32-S3.
---
usermods/audioreactive/audio_reactive.h | 107 ++++++++++++++++++++++--
1 file changed, 98 insertions(+), 9 deletions(-)
diff --git a/usermods/audioreactive/audio_reactive.h b/usermods/audioreactive/audio_reactive.h
index d9247f37..99e80f04 100644
--- a/usermods/audioreactive/audio_reactive.h
+++ b/usermods/audioreactive/audio_reactive.h
@@ -41,6 +41,12 @@
* ....
*/
+
+#if defined(WLEDMM_FASTPATH) && defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32)
+#define FFT_USE_SLIDING_WINDOW // perform FFT with sliding window = 50% overlap
+#endif
+
+
#define FFT_PREFER_EXACT_PEAKS // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
//#define SR_STATS
@@ -172,8 +178,13 @@ static bool limiterOn = false; // bool: enable / disable dynamic
#else
static bool limiterOn = true;
#endif
+#ifdef FFT_USE_SLIDING_WINDOW
+static uint16_t attackTime = 14; // int: attack time in milliseconds. Default 0.014sec
+static uint16_t decayTime = 250; // int: decay time in milliseconds. New default 250ms.
+#else
static uint16_t attackTime = 50; // int: attack time in milliseconds. Default 0.08sec
static uint16_t decayTime = 300; // int: decay time in milliseconds. New default 300ms. Old default was 1.40sec
+#endif
// peak detection
#ifdef ARDUINO_ARCH_ESP32
@@ -242,7 +253,9 @@ static constexpr uint8_t averageByRMS = false; // false: us
static constexpr uint8_t averageByRMS = true; // false: use mean value, true: use RMS (root mean squared). use better method on fast MCUs.
#endif
static uint8_t freqDist = 0; // 0=old 1=rightshift mode
-
+#ifdef FFT_USE_SLIDING_WINDOW
+static uint8_t doSlidingFFT = 1; // 1 = use sliding window FFT (faster & more accurate)
+#endif
// variables used in effects
//static int16_t volumeRaw = 0; // either sampleRaw or rawSampleAgc depending on soundAgc
@@ -345,7 +358,11 @@ constexpr SRate_t SAMPLE_RATE = 22050; // Base sample rate in Hz - 22Khz
#ifndef WLEDMM_FASTPATH
#define FFT_MIN_CYCLE 21 // minimum time before FFT task is repeated. Use with 22Khz sampling
#else
-#define FFT_MIN_CYCLE 15 // reduce min time, to allow faster catch-up when I2S is lagging
+ #ifdef FFT_USE_SLIDING_WINDOW
+ #define FFT_MIN_CYCLE 8 // we only have 12ms to take 1/2 batch of samples
+ #else
+ #define FFT_MIN_CYCLE 15 // reduce min time, to allow faster catch-up when I2S is lagging
+ #endif
#endif
//#define FFT_MIN_CYCLE 30 // Use with 16Khz sampling
//#define FFT_MIN_CYCLE 23 // minimum time before FFT task is repeated. Use with 20Khz sampling
@@ -363,7 +380,7 @@ constexpr SRate_t SAMPLE_RATE = 18000; // 18Khz; Physical sample time -
// FFT Constants
constexpr uint16_t samplesFFT = 512; // Samples in an FFT batch - This value MUST ALWAYS be a power of 2
-constexpr uint16_t samplesFFT_2 = 256; // meaningfull part of FFT results - only the "lower half" contains useful information.
+constexpr uint16_t samplesFFT_2 = 256; // meaningful part of FFT results - only the "lower half" contains useful information.
// the following are observed values, supported by a bit of "educated guessing"
//#define FFT_DOWNSCALE 0.65f // 20kHz - downscaling factor for FFT results - "Flat-Top" window @20Khz, old freq channels
//#define FFT_DOWNSCALE 0.46f // downscaling factor for FFT results - for "Flat-Top" window @22Khz, new freq channels
@@ -473,6 +490,12 @@ void FFTcode(void * parameter)
const TickType_t xFrequencyDouble = FFT_MIN_CYCLE * portTICK_PERIOD_MS * 2;
static bool isFirstRun = false;
+#ifdef FFT_USE_SLIDING_WINDOW
+ static float oldSamples[samplesFFT_2] = {0.0f}; // previous 50% of samples
+ static bool haveOldSamples = false; // for sliding window FFT
+ bool usingOldSamples = false;
+#endif
+
#ifdef FFT_MAJORPEAK_HUMAN_EAR
// pre-compute pink noise scaling table
for(uint_fast16_t binInd = 0; binInd < samplesFFT; binInd++) {
@@ -492,6 +515,9 @@ void FFTcode(void * parameter)
// Don't run FFT computing code if we're in Receive mode or in realtime mode
if (disableSoundProcessing || (audioSyncEnabled == AUDIOSYNC_REC)) {
isFirstRun = false;
+ #ifdef FFT_USE_SLIDING_WINDOW
+ haveOldSamples = false;
+ #endif
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
continue;
}
@@ -511,7 +537,26 @@ void FFTcode(void * parameter)
#endif
// get a fresh batch of samples from I2S
+ memset(vReal, 0, sizeof(vReal)); // start clean
+#ifdef FFT_USE_SLIDING_WINDOW
+ uint16_t readOffset;
+ if (haveOldSamples && (doSlidingFFT > 0)) {
+ memcpy(vReal, oldSamples, sizeof(float) * samplesFFT_2); // copy first 50% from buffer
+ usingOldSamples = true;
+ readOffset = samplesFFT_2;
+ } else {
+ usingOldSamples = false;
+ readOffset = 0;
+ }
+ // read fresh samples, in chunks of 50%
+ do {
+ // this looks a bit cumbersome, but it onlyworks this way - any second instance of the getSamples() call delivers junk data.
+ if (audioSource) audioSource->getSamples(vReal+readOffset, samplesFFT_2);
+ readOffset += samplesFFT_2;
+ } while (readOffset < samplesFFT);
+#else
if (audioSource) audioSource->getSamples(vReal, samplesFFT);
+#endif
#if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
// debug info in case that stack usage changes
@@ -552,13 +597,23 @@ void FFTcode(void * parameter)
if (strip.isServicing()) delay(2);
#endif
+ // normal mode: filter everything
+ float *samplesStart = vReal;
+ uint16_t sampleCount = samplesFFT;
+ #ifdef FFT_USE_SLIDING_WINDOW
+ if (usingOldSamples) {
+ // sliding window mode: only latest 50% need filtering
+ samplesStart = vReal + samplesFFT_2;
+ sampleCount = samplesFFT_2;
+ }
+ #endif
// band pass filter - can reduce noise floor by a factor of 50
// downside: frequencies below 100Hz will be ignored
bool doDCRemoval = false; // DCRemove is only necessary if we don't use any kind of low-cut filtering
if ((useInputFilter > 0) && (useInputFilter < 99)) {
switch(useInputFilter) {
- case 1: runMicFilter(samplesFFT, vReal); break; // PDM microphone bandpass
- case 2: runDCBlocker(samplesFFT, vReal); break; // generic Low-Cut + DC blocker (~40hz cut-off)
+ case 1: runMicFilter(sampleCount, samplesStart); break; // PDM microphone bandpass
+ case 2: runDCBlocker(sampleCount, samplesStart); break; // generic Low-Cut + DC blocker (~40hz cut-off)
default: doDCRemoval = true; break;
}
} else doDCRemoval = true;
@@ -575,14 +630,24 @@ void FFTcode(void * parameter)
// set imaginary parts to 0
memset(vImag, 0, sizeof(vImag));
+ #ifdef FFT_USE_SLIDING_WINDOW
+ memcpy(oldSamples, vReal+samplesFFT_2, sizeof(float) * samplesFFT_2); // copy last 50% to buffer (for sliding window FFT)
+ haveOldSamples = true;
+ #endif
+
// find highest sample in the batch, and count zero crossings
float maxSample = 0.0f; // max sample from FFT batch
uint_fast16_t newZeroCrossingCount = 0;
for (int i=0; i < samplesFFT; i++) {
// pick our our current mic sample - we take the max value from all samples that go into FFT
- if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024))) //skip extreme values - normally these are artefacts
+ if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024))) { //skip extreme values - normally these are artefacts
+ #ifdef FFT_USE_SLIDING_WINDOW
+ if (usingOldSamples) {
+ if ((i >= samplesFFT_2) && (fabsf(vReal[i]) > maxSample)) maxSample = fabsf(vReal[i]); // only look at newest 50%
+ } else
+ #endif
if (fabsf((float)vReal[i]) > maxSample) maxSample = fabsf((float)vReal[i]);
-
+ }
// WLED-MM/TroyHacks: Calculate zero crossings
//
if (i < (samplesFFT-1)) {
@@ -812,6 +877,11 @@ void FFTcode(void * parameter)
if ((audioSource == nullptr) || (audioSource->getType() != AudioSource::Type_I2SAdc)) // the "delay trick" does not help for analog ADC
#endif
{
+ #ifdef FFT_USE_SLIDING_WINDOW
+ if (!usingOldSamples) {
+ vTaskDelayUntil( &xLastWakeTime, xFrequencyDouble); // we need a double wait when no old data was used
+ } else
+ #endif
if ((skipSecondFFT == false) || (fabsf(volumeSmth) < 0.25f)) {
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
} else if (isFirstRun == true) {
@@ -2523,9 +2593,15 @@ class AudioReactive : public Usermod {
infoArr = user.createNestedArray(F("FFT time"));
infoArr.add(roundf(fftTime)/100.0f);
- if ((fftTime/100) >= FFT_MIN_CYCLE) // FFT time over budget -> I2S buffer will overflow
+
+#ifdef FFT_USE_SLIDING_WINDOW
+ unsigned timeBudget = doSlidingFFT ? (FFT_MIN_CYCLE) : fftTaskCycle / 115;
+#else
+ unsigned timeBudget = (FFT_MIN_CYCLE);
+#endif
+ if ((fftTime/100) >= timeBudget) // FFT time over budget -> I2S buffer will overflow
infoArr.add("! ms");
- else if ((fftTime/85 + filterTime/85 + sampleTime/85) >= FFT_MIN_CYCLE) // FFT time >75% of budget -> risk of instability
+ else if ((fftTime/85 + filterTime/85 + sampleTime/85) >= timeBudget) // FFT time >75% of budget -> risk of instability
infoArr.add(" ms!");
else
infoArr.add(" ms");
@@ -2649,6 +2725,9 @@ class AudioReactive : public Usermod {
poweruser[F("freqDist")] = freqDist;
//poweruser[F("freqRMS")] = averageByRMS;
+#ifdef FFT_USE_SLIDING_WINDOW
+ poweruser[F("I2S_FastPath")] = doSlidingFFT;
+#endif
JsonObject freqScale = top.createNestedObject("frequency");
freqScale[F("scale")] = FFTScalingMode;
freqScale[F("profile")] = pinkIndex; //WLEDMM
@@ -2720,6 +2799,9 @@ class AudioReactive : public Usermod {
configComplete &= getJsonValue(top["experiments"][F("micLev")], micLevelMethod);
configComplete &= getJsonValue(top["experiments"][F("freqDist")], freqDist);
//configComplete &= getJsonValue(top["experiments"][F("freqRMS")], averageByRMS);
+#ifdef FFT_USE_SLIDING_WINDOW
+ configComplete &= getJsonValue(top["experiments"][F("I2S_FastPath")], doSlidingFFT);
+#endif
configComplete &= getJsonValue(top["frequency"][F("scale")], FFTScalingMode);
configComplete &= getJsonValue(top["frequency"][F("profile")], pinkIndex); //WLEDMM
@@ -2829,6 +2911,13 @@ class AudioReactive : public Usermod {
//oappend(SET_F("addOption(dd,'On',1);"));
//oappend(SET_F("addInfo('AudioReactive:experiments:freqRMS',1,'☾');"));
+#ifdef FFT_USE_SLIDING_WINDOW
+ oappend(SET_F("dd=addDropdown(ux,'experiments:I2S_FastPath');"));
+ oappend(SET_F("addOption(dd,'Off',0);"));
+ oappend(SET_F("addOption(dd,'On (⎌)',1);"));
+ oappend(SET_F("addInfo(ux+':experiments:I2S_FastPath',1,'☾');"));
+#endif
+
oappend(SET_F("dd=addDropdown('AudioReactive','dynamics:limiter');"));
oappend(SET_F("addOption(dd,'Off',0);"));
oappend(SET_F("addOption(dd,'On',1);"));