effect math sppedup - up to 3x faster

-> distortion waves 3x speedup
-> hiphotic 2x speedup
-> waving cell 1.5x speedup

* replace sin8_t by lookup-table with pre-computed values
* moved integer sin and cos to fcn_declare.h (inlined by the compiler)
* moved gamma32 to fcn_declare.h (inlined by the compiler)
* a few other small tweaks
This commit is contained in:
Frank
2025-12-17 21:59:22 +01:00
parent 49e6de33c7
commit bc7cf062e8
7 changed files with 107 additions and 25 deletions

View File

@@ -5883,8 +5883,8 @@ static const char _data_FX_MODE_2DSNOWFALL[] PROGMEM = "Snow Fall ☾@!,Spawn Ra
uint16_t mode_2DHiphotic() { // By: ldirko https://editor.soulmatelights.com/gallery/810 , Modified by: Andrew Tuline uint16_t mode_2DHiphotic() { // By: ldirko https://editor.soulmatelights.com/gallery/810 , Modified by: Andrew Tuline
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
const uint16_t cols = SEGMENT.virtualWidth(); const uint_fast16_t cols = SEGMENT.virtualWidth();
const uint16_t rows = SEGMENT.virtualHeight(); const uint_fast16_t rows = SEGMENT.virtualHeight();
const uint32_t a = strip.now / ((SEGMENT.custom3>>1)+1); const uint32_t a = strip.now / ((SEGMENT.custom3>>1)+1);
for (int x = 0; x < cols; x++) { for (int x = 0; x < cols; x++) {
@@ -8654,10 +8654,10 @@ static const char _data_FX_MODE_2DAKEMI[] PROGMEM = "Akemi@Color speed,Dance;Hea
uint16_t mode_2Ddistortionwaves() { uint16_t mode_2Ddistortionwaves() {
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
const uint16_t cols = SEGMENT.virtualWidth(); const uint_fast16_t cols = SEGMENT.virtualWidth();
const uint16_t rows = SEGMENT.virtualHeight(); const uint_fast16_t rows = SEGMENT.virtualHeight();
if (SEGENV.call == 0) { if (SEGENV.call == 0) {
SEGMENT.setUpLeds(); //SEGMENT.setUpLeds();
SEGMENT.fill(BLACK); SEGMENT.fill(BLACK);
} }

View File

@@ -19,6 +19,7 @@
#if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type) #if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type)
#define FASTLED_INTERNAL #define FASTLED_INTERNAL
#define USE_GET_MILLISECOND_TIMER
#include <FastLED.h> #include <FastLED.h>
#endif #endif
@@ -162,7 +163,8 @@ inline __attribute__((hot)) CRGB ColorFromPaletteWLED(const CRGBPalette16& pal,
green1 = (green1 * scale) >> 8; green1 = (green1 * scale) >> 8;
blue1 = (blue1 * scale) >> 8; blue1 = (blue1 * scale) >> 8;
} }
return RGBW32(red1,green1,blue1,0); //return RGBW32(red1,green1,blue1,0);
return CRGB(red1,green1,blue1);
} }
#define ColorFromPalette ColorFromPaletteWLED // override fastled function #define ColorFromPalette ColorFromPaletteWLED // override fastled function

View File

@@ -486,6 +486,9 @@ IRAM_ATTR_YN __attribute__((hot)) uint8_t gamma8_slow(uint8_t b) // WLEDMM add
return gammaT[b]; return gammaT[b];
} }
#if defined(ARDUINO_ARCH_ESP32)
// WLEDMM: gamma32() moved to fcn_declare.h (inlining for speed)
#else
// used for color gamma correction // used for color gamma correction
IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color) IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
{ {
@@ -500,3 +503,4 @@ IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
b = gammaT[b]; b = gammaT[b];
return RGBW32(r, g, b, w); return RGBW32(r, g, b, w);
} }
#endif

View File

@@ -79,7 +79,6 @@ void setRandomColor(byte* rgb);
uint8_t gamma8_cal(uint8_t b, float gamma); uint8_t gamma8_cal(uint8_t b, float gamma);
void calcGammaTable(float gamma); void calcGammaTable(float gamma);
uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure
uint32_t __attribute__((pure)) gamma32(uint32_t); // WLEDMM: added attribute pure
uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction
uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is) uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is)
@@ -89,6 +88,34 @@ extern uint8_t gammaT[256]; // colors.cpp
inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed
inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];} inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}
#if defined(ARDUINO_ARCH_ESP32)
#if !defined(RGBW32) // WLEDMM define color macros in case they are missing
#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b))))
#endif
#if !defined(W) && !defined(R) // WLEDMM define color macros in case they are missing
#define R(c) (byte((c) >> 16))
#define G(c) (byte((c) >> 8))
#define B(c) (byte(c))
#define W(c) (byte((c) >> 24))
#endif
extern bool gammaCorrectCol; // wled.h
inline uint32_t __attribute__((hot)) gamma32(uint32_t color) { // WLEDMM: moved here for inlining
if (!gammaCorrectCol) return color;
uint8_t w = W(color);
uint8_t r = R(color);
uint8_t g = G(color);
uint8_t b = B(color);
w = gammaT[w];
r = gammaT[r];
g = gammaT[g];
b = gammaT[b];
return RGBW32(r, g, b, w);
}
#else
uint32_t __attribute__((pure)) gamma32(uint32_t);
#endif
#define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility #define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility
#define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility #define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility
@@ -489,13 +516,48 @@ void clearEEPROM();
#endif #endif
//wled_math.cpp //wled_math.cpp
void init_math();
// WLEDMM: math functions inlined for speed
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
// optimized integer implementation by @dedehai
inline int16_t sin16_t(uint16_t theta) {
int scale = 1;
if (theta > 0x7FFF) {
theta = 0xFFFF - theta;
scale = -1; // second half of the sine function is negative (pi - 2*pi)
}
uint32_t precal = theta * (0x7FFF - theta);
uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required
int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4
int16_t result = numerator / denominator;
return result * scale;
}
inline int16_t cos16_t(uint16_t theta) {
return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2)
}
#if defined(ARDUINO_ARCH_ESP32)
// WLEDMM: use pre-calculated lookup-table for sin8_t
extern uint8_t sinT[256]; // wled_math.cpp
inline uint8_t sin8_t(uint8_t theta) { return sinT[theta];}
#else
// no LUT on 8266, to save 256 bytes of RAM
inline uint8_t sin8_t(uint8_t theta) {
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
}
#endif
inline uint8_t cos8_t(uint8_t theta) {
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
}
//float cos_t(float phi); // use float math //float cos_t(float phi); // use float math
//float sin_t(float phi); //float sin_t(float phi);
//float tan_t(float x); //float tan_t(float x);
int16_t sin16_t(uint16_t theta);
int16_t cos16_t(uint16_t theta);
uint8_t sin8_t(uint8_t theta);
uint8_t cos8_t(uint8_t theta);
float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf()) float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf())
float cos_approx(float theta); float cos_approx(float theta);

View File

@@ -843,7 +843,7 @@ static inline int32_t lerpPerlin(int32_t a, int32_t b, int32_t t) {
} }
// 1D Perlin noise function that returns a value in range of -24691 to 24689 // 1D Perlin noise function that returns a value in range of -24691 to 24689
int32_t perlin1D_raw(uint32_t x, bool is16bit) { int32_t IRAM_ATTR_YN perlin1D_raw(uint32_t x, bool is16bit) {
// integer and fractional part coordinates // integer and fractional part coordinates
int32_t x0 = x >> 16; int32_t x0 = x >> 16;
int32_t x1 = x0 + 1; int32_t x1 = x0 + 1;
@@ -861,7 +861,7 @@ int32_t perlin1D_raw(uint32_t x, bool is16bit) {
} }
// 2D Perlin noise function that returns a value in range of -20633 to 20629 // 2D Perlin noise function that returns a value in range of -20633 to 20629
int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) { int32_t IRAM_ATTR_YN perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) {
int32_t x0 = x >> 16; int32_t x0 = x >> 16;
int32_t y0 = y >> 16; int32_t y0 = y >> 16;
int32_t x1 = x0 + 1; int32_t x1 = x0 + 1;
@@ -893,7 +893,7 @@ int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) {
} }
// 3D Perlin noise function that returns a value in range of -16788 to 16381 // 3D Perlin noise function that returns a value in range of -16788 to 16381
int32_t perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) { int32_t IRAM_ATTR_YN perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) {
int32_t x0 = x >> 16; int32_t x0 = x >> 16;
int32_t y0 = y >> 16; int32_t y0 = y >> 16;
int32_t z0 = z >> 16; int32_t z0 = z >> 16;

View File

@@ -474,6 +474,8 @@ void WLED::setup()
if (!Serial) delay(300); // just a tiny wait to avoid problems later when acessing serial if (!Serial) delay(300); // just a tiny wait to avoid problems later when acessing serial
#endif #endif
init_math(); // WLEDMM: pre-calculate some lookup tables
#ifdef ARDUINO_ARCH_ESP32 #ifdef ARDUINO_ARCH_ESP32
#if defined(WLED_DEBUG) && (defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3) || ARDUINO_USB_CDC_ON_BOOT) #if defined(WLED_DEBUG) && (defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3) || ARDUINO_USB_CDC_ON_BOOT)
if (!Serial) delay(2500); // WLEDMM allow CDC USB serial to initialise (WLED_DEBUG only) if (!Serial) delay(2500); // WLEDMM allow CDC USB serial to initialise (WLED_DEBUG only)

View File

@@ -59,10 +59,18 @@ float tan_t(float x) {
} }
*/ */
// WLEDMM: sin16_t() moved to fcn_declare.h (inlining for speed)
// WLEDMM: cos16_t() moved to fcn_declare.h (inlining for speed)
// WLEDMM: sin8_t() moved to fcn_declare.h (inlining for speed)
// WLEDMM: cos8_t() moved to fcn_declare.h (inlining for speed)
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x)) // 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767) // input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
// optimized integer implementation by @dedehai // optimized integer implementation by @dedehai
int16_t sin16_t(uint16_t theta) { static int16_t sin16_calc(uint16_t theta) {
int scale = 1; int scale = 1;
if (theta > 0x7FFF) { if (theta > 0x7FFF) {
theta = 0xFFFF - theta; theta = 0xFFFF - theta;
@@ -75,30 +83,34 @@ int16_t sin16_t(uint16_t theta) {
return result * scale; return result * scale;
} }
int16_t cos16_t(uint16_t theta) { #if defined(ARDUINO_ARCH_ESP32)
return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2) static uint8_t sin8_calc(uint8_t theta) {
} int32_t sin16 = sin16_calc((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
uint8_t sin8_t(uint8_t theta) {
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
} }
uint8_t cos8_t(uint8_t theta) { // WLEDMM: pre-calculate lookup-table for sin8_t
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2) uint8_t DRAM_ATTR sinT[256];
void init_math(void) {
for (unsigned i = 0; i < 256; i++)
sinT[i] = sin8_calc(i);
} }
#else
void init_math(void) { return;} // dummy for 8266
#endif
float sin_approx(float theta) { float sin_approx(float theta) {
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3) uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
int32_t result = sin16_t(scaled_theta); int32_t result = sin16_calc(scaled_theta);
float sin = float(result) / 0x7FFF; float sin = float(result) / 0x7FFF;
return sin; return sin;
} }
float cos_approx(float theta) { float cos_approx(float theta) {
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3) uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
int32_t result = sin16_t(scaled_theta + 0x4000); int32_t result = sin16_calc(scaled_theta + 0x4000);
float cos = float(result) / 0x7FFF; float cos = float(result) / 0x7FFF;
return cos; return cos;
} }