audio FASTPATH, part 2
* introducing sliding window FFT, which effectively doubles the rate of samples - and FFT results - produced per second. As a side-effect, it also makes FFT a bit less noisy. As sliding window FFT requires double the number of FFT runs, the feature is only enabled on esp32 and esp32-S3.
This commit is contained in:
@@ -41,6 +41,12 @@
|
||||
* ....
|
||||
*/
|
||||
|
||||
|
||||
#if defined(WLEDMM_FASTPATH) && defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32)
|
||||
#define FFT_USE_SLIDING_WINDOW // perform FFT with sliding window = 50% overlap
|
||||
#endif
|
||||
|
||||
|
||||
#define FFT_PREFER_EXACT_PEAKS // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
|
||||
//#define SR_STATS
|
||||
|
||||
@@ -172,8 +178,13 @@ static bool limiterOn = false; // bool: enable / disable dynamic
|
||||
#else
|
||||
static bool limiterOn = true;
|
||||
#endif
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
static uint16_t attackTime = 14; // int: attack time in milliseconds. Default 0.014sec
|
||||
static uint16_t decayTime = 250; // int: decay time in milliseconds. New default 250ms.
|
||||
#else
|
||||
static uint16_t attackTime = 50; // int: attack time in milliseconds. Default 0.08sec
|
||||
static uint16_t decayTime = 300; // int: decay time in milliseconds. New default 300ms. Old default was 1.40sec
|
||||
#endif
|
||||
|
||||
// peak detection
|
||||
#ifdef ARDUINO_ARCH_ESP32
|
||||
@@ -242,7 +253,9 @@ static constexpr uint8_t averageByRMS = false; // false: us
|
||||
static constexpr uint8_t averageByRMS = true; // false: use mean value, true: use RMS (root mean squared). use better method on fast MCUs.
|
||||
#endif
|
||||
static uint8_t freqDist = 0; // 0=old 1=rightshift mode
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
static uint8_t doSlidingFFT = 1; // 1 = use sliding window FFT (faster & more accurate)
|
||||
#endif
|
||||
|
||||
// variables used in effects
|
||||
//static int16_t volumeRaw = 0; // either sampleRaw or rawSampleAgc depending on soundAgc
|
||||
@@ -345,7 +358,11 @@ constexpr SRate_t SAMPLE_RATE = 22050; // Base sample rate in Hz - 22Khz
|
||||
#ifndef WLEDMM_FASTPATH
|
||||
#define FFT_MIN_CYCLE 21 // minimum time before FFT task is repeated. Use with 22Khz sampling
|
||||
#else
|
||||
#define FFT_MIN_CYCLE 15 // reduce min time, to allow faster catch-up when I2S is lagging
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
#define FFT_MIN_CYCLE 8 // we only have 12ms to take 1/2 batch of samples
|
||||
#else
|
||||
#define FFT_MIN_CYCLE 15 // reduce min time, to allow faster catch-up when I2S is lagging
|
||||
#endif
|
||||
#endif
|
||||
//#define FFT_MIN_CYCLE 30 // Use with 16Khz sampling
|
||||
//#define FFT_MIN_CYCLE 23 // minimum time before FFT task is repeated. Use with 20Khz sampling
|
||||
@@ -363,7 +380,7 @@ constexpr SRate_t SAMPLE_RATE = 18000; // 18Khz; Physical sample time -
|
||||
|
||||
// FFT Constants
|
||||
constexpr uint16_t samplesFFT = 512; // Samples in an FFT batch - This value MUST ALWAYS be a power of 2
|
||||
constexpr uint16_t samplesFFT_2 = 256; // meaningfull part of FFT results - only the "lower half" contains useful information.
|
||||
constexpr uint16_t samplesFFT_2 = 256; // meaningful part of FFT results - only the "lower half" contains useful information.
|
||||
// the following are observed values, supported by a bit of "educated guessing"
|
||||
//#define FFT_DOWNSCALE 0.65f // 20kHz - downscaling factor for FFT results - "Flat-Top" window @20Khz, old freq channels
|
||||
//#define FFT_DOWNSCALE 0.46f // downscaling factor for FFT results - for "Flat-Top" window @22Khz, new freq channels
|
||||
@@ -473,6 +490,12 @@ void FFTcode(void * parameter)
|
||||
const TickType_t xFrequencyDouble = FFT_MIN_CYCLE * portTICK_PERIOD_MS * 2;
|
||||
static bool isFirstRun = false;
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
static float oldSamples[samplesFFT_2] = {0.0f}; // previous 50% of samples
|
||||
static bool haveOldSamples = false; // for sliding window FFT
|
||||
bool usingOldSamples = false;
|
||||
#endif
|
||||
|
||||
#ifdef FFT_MAJORPEAK_HUMAN_EAR
|
||||
// pre-compute pink noise scaling table
|
||||
for(uint_fast16_t binInd = 0; binInd < samplesFFT; binInd++) {
|
||||
@@ -492,6 +515,9 @@ void FFTcode(void * parameter)
|
||||
// Don't run FFT computing code if we're in Receive mode or in realtime mode
|
||||
if (disableSoundProcessing || (audioSyncEnabled == AUDIOSYNC_REC)) {
|
||||
isFirstRun = false;
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
haveOldSamples = false;
|
||||
#endif
|
||||
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
|
||||
continue;
|
||||
}
|
||||
@@ -511,7 +537,26 @@ void FFTcode(void * parameter)
|
||||
#endif
|
||||
|
||||
// get a fresh batch of samples from I2S
|
||||
memset(vReal, 0, sizeof(vReal)); // start clean
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
uint16_t readOffset;
|
||||
if (haveOldSamples && (doSlidingFFT > 0)) {
|
||||
memcpy(vReal, oldSamples, sizeof(float) * samplesFFT_2); // copy first 50% from buffer
|
||||
usingOldSamples = true;
|
||||
readOffset = samplesFFT_2;
|
||||
} else {
|
||||
usingOldSamples = false;
|
||||
readOffset = 0;
|
||||
}
|
||||
// read fresh samples, in chunks of 50%
|
||||
do {
|
||||
// this looks a bit cumbersome, but it onlyworks this way - any second instance of the getSamples() call delivers junk data.
|
||||
if (audioSource) audioSource->getSamples(vReal+readOffset, samplesFFT_2);
|
||||
readOffset += samplesFFT_2;
|
||||
} while (readOffset < samplesFFT);
|
||||
#else
|
||||
if (audioSource) audioSource->getSamples(vReal, samplesFFT);
|
||||
#endif
|
||||
|
||||
#if defined(WLED_DEBUG) || defined(SR_DEBUG)|| defined(SR_STATS)
|
||||
// debug info in case that stack usage changes
|
||||
@@ -552,13 +597,23 @@ void FFTcode(void * parameter)
|
||||
if (strip.isServicing()) delay(2);
|
||||
#endif
|
||||
|
||||
// normal mode: filter everything
|
||||
float *samplesStart = vReal;
|
||||
uint16_t sampleCount = samplesFFT;
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
if (usingOldSamples) {
|
||||
// sliding window mode: only latest 50% need filtering
|
||||
samplesStart = vReal + samplesFFT_2;
|
||||
sampleCount = samplesFFT_2;
|
||||
}
|
||||
#endif
|
||||
// band pass filter - can reduce noise floor by a factor of 50
|
||||
// downside: frequencies below 100Hz will be ignored
|
||||
bool doDCRemoval = false; // DCRemove is only necessary if we don't use any kind of low-cut filtering
|
||||
if ((useInputFilter > 0) && (useInputFilter < 99)) {
|
||||
switch(useInputFilter) {
|
||||
case 1: runMicFilter(samplesFFT, vReal); break; // PDM microphone bandpass
|
||||
case 2: runDCBlocker(samplesFFT, vReal); break; // generic Low-Cut + DC blocker (~40hz cut-off)
|
||||
case 1: runMicFilter(sampleCount, samplesStart); break; // PDM microphone bandpass
|
||||
case 2: runDCBlocker(sampleCount, samplesStart); break; // generic Low-Cut + DC blocker (~40hz cut-off)
|
||||
default: doDCRemoval = true; break;
|
||||
}
|
||||
} else doDCRemoval = true;
|
||||
@@ -575,14 +630,24 @@ void FFTcode(void * parameter)
|
||||
// set imaginary parts to 0
|
||||
memset(vImag, 0, sizeof(vImag));
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
memcpy(oldSamples, vReal+samplesFFT_2, sizeof(float) * samplesFFT_2); // copy last 50% to buffer (for sliding window FFT)
|
||||
haveOldSamples = true;
|
||||
#endif
|
||||
|
||||
// find highest sample in the batch, and count zero crossings
|
||||
float maxSample = 0.0f; // max sample from FFT batch
|
||||
uint_fast16_t newZeroCrossingCount = 0;
|
||||
for (int i=0; i < samplesFFT; i++) {
|
||||
// pick our our current mic sample - we take the max value from all samples that go into FFT
|
||||
if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024))) //skip extreme values - normally these are artefacts
|
||||
if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024))) { //skip extreme values - normally these are artefacts
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
if (usingOldSamples) {
|
||||
if ((i >= samplesFFT_2) && (fabsf(vReal[i]) > maxSample)) maxSample = fabsf(vReal[i]); // only look at newest 50%
|
||||
} else
|
||||
#endif
|
||||
if (fabsf((float)vReal[i]) > maxSample) maxSample = fabsf((float)vReal[i]);
|
||||
|
||||
}
|
||||
// WLED-MM/TroyHacks: Calculate zero crossings
|
||||
//
|
||||
if (i < (samplesFFT-1)) {
|
||||
@@ -812,6 +877,11 @@ void FFTcode(void * parameter)
|
||||
if ((audioSource == nullptr) || (audioSource->getType() != AudioSource::Type_I2SAdc)) // the "delay trick" does not help for analog ADC
|
||||
#endif
|
||||
{
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
if (!usingOldSamples) {
|
||||
vTaskDelayUntil( &xLastWakeTime, xFrequencyDouble); // we need a double wait when no old data was used
|
||||
} else
|
||||
#endif
|
||||
if ((skipSecondFFT == false) || (fabsf(volumeSmth) < 0.25f)) {
|
||||
vTaskDelayUntil( &xLastWakeTime, xFrequency); // release CPU, and let I2S fill its buffers
|
||||
} else if (isFirstRun == true) {
|
||||
@@ -2523,9 +2593,15 @@ class AudioReactive : public Usermod {
|
||||
|
||||
infoArr = user.createNestedArray(F("FFT time"));
|
||||
infoArr.add(roundf(fftTime)/100.0f);
|
||||
if ((fftTime/100) >= FFT_MIN_CYCLE) // FFT time over budget -> I2S buffer will overflow
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
unsigned timeBudget = doSlidingFFT ? (FFT_MIN_CYCLE) : fftTaskCycle / 115;
|
||||
#else
|
||||
unsigned timeBudget = (FFT_MIN_CYCLE);
|
||||
#endif
|
||||
if ((fftTime/100) >= timeBudget) // FFT time over budget -> I2S buffer will overflow
|
||||
infoArr.add("<b style=\"color:red;\">! ms</b>");
|
||||
else if ((fftTime/85 + filterTime/85 + sampleTime/85) >= FFT_MIN_CYCLE) // FFT time >75% of budget -> risk of instability
|
||||
else if ((fftTime/85 + filterTime/85 + sampleTime/85) >= timeBudget) // FFT time >75% of budget -> risk of instability
|
||||
infoArr.add("<b style=\"color:orange;\"> ms!</b>");
|
||||
else
|
||||
infoArr.add(" ms");
|
||||
@@ -2649,6 +2725,9 @@ class AudioReactive : public Usermod {
|
||||
poweruser[F("freqDist")] = freqDist;
|
||||
//poweruser[F("freqRMS")] = averageByRMS;
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
poweruser[F("I2S_FastPath")] = doSlidingFFT;
|
||||
#endif
|
||||
JsonObject freqScale = top.createNestedObject("frequency");
|
||||
freqScale[F("scale")] = FFTScalingMode;
|
||||
freqScale[F("profile")] = pinkIndex; //WLEDMM
|
||||
@@ -2720,6 +2799,9 @@ class AudioReactive : public Usermod {
|
||||
configComplete &= getJsonValue(top["experiments"][F("micLev")], micLevelMethod);
|
||||
configComplete &= getJsonValue(top["experiments"][F("freqDist")], freqDist);
|
||||
//configComplete &= getJsonValue(top["experiments"][F("freqRMS")], averageByRMS);
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
configComplete &= getJsonValue(top["experiments"][F("I2S_FastPath")], doSlidingFFT);
|
||||
#endif
|
||||
|
||||
configComplete &= getJsonValue(top["frequency"][F("scale")], FFTScalingMode);
|
||||
configComplete &= getJsonValue(top["frequency"][F("profile")], pinkIndex); //WLEDMM
|
||||
@@ -2829,6 +2911,13 @@ class AudioReactive : public Usermod {
|
||||
//oappend(SET_F("addOption(dd,'On',1);"));
|
||||
//oappend(SET_F("addInfo('AudioReactive:experiments:freqRMS',1,'☾');"));
|
||||
|
||||
#ifdef FFT_USE_SLIDING_WINDOW
|
||||
oappend(SET_F("dd=addDropdown(ux,'experiments:I2S_FastPath');"));
|
||||
oappend(SET_F("addOption(dd,'Off',0);"));
|
||||
oappend(SET_F("addOption(dd,'On (⎌)',1);"));
|
||||
oappend(SET_F("addInfo(ux+':experiments:I2S_FastPath',1,'☾');"));
|
||||
#endif
|
||||
|
||||
oappend(SET_F("dd=addDropdown('AudioReactive','dynamics:limiter');"));
|
||||
oappend(SET_F("addOption(dd,'Off',0);"));
|
||||
oappend(SET_F("addOption(dd,'On',1);"));
|
||||
|
||||
Reference in New Issue
Block a user