effect math sppedup - up to 3x faster

-> distortion waves 3x speedup
-> hiphotic 2x speedup
-> waving cell 1.5x speedup

* replace sin8_t by lookup-table with pre-computed values
* moved integer sin and cos to fcn_declare.h (inlined by the compiler)
* moved gamma32 to fcn_declare.h (inlined by the compiler)
* a few other small tweaks
This commit is contained in:
Frank
2025-12-17 21:59:22 +01:00
parent 49e6de33c7
commit bc7cf062e8
7 changed files with 107 additions and 25 deletions

View File

@@ -79,7 +79,6 @@ void setRandomColor(byte* rgb);
uint8_t gamma8_cal(uint8_t b, float gamma);
void calcGammaTable(float gamma);
uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure
uint32_t __attribute__((pure)) gamma32(uint32_t); // WLEDMM: added attribute pure
uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction
uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is)
@@ -89,6 +88,34 @@ extern uint8_t gammaT[256]; // colors.cpp
inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed
inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}
#if defined(ARDUINO_ARCH_ESP32)
#if !defined(RGBW32) // WLEDMM define color macros in case they are missing
#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b))))
#endif
#if !defined(W) && !defined(R) // WLEDMM define color macros in case they are missing
#define R(c) (byte((c) >> 16))
#define G(c) (byte((c) >> 8))
#define B(c) (byte(c))
#define W(c) (byte((c) >> 24))
#endif
extern bool gammaCorrectCol; // wled.h
inline uint32_t __attribute__((hot)) gamma32(uint32_t color) { // WLEDMM: moved here for inlining
if (!gammaCorrectCol) return color;
uint8_t w = W(color);
uint8_t r = R(color);
uint8_t g = G(color);
uint8_t b = B(color);
w = gammaT[w];
r = gammaT[r];
g = gammaT[g];
b = gammaT[b];
return RGBW32(r, g, b, w);
}
#else
uint32_t __attribute__((pure)) gamma32(uint32_t);
#endif
#define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility
#define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility
@@ -489,13 +516,48 @@ void clearEEPROM();
#endif
//wled_math.cpp
void init_math();
// WLEDMM: math functions inlined for speed
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
// optimized integer implementation by @dedehai
inline int16_t sin16_t(uint16_t theta) {
int scale = 1;
if (theta > 0x7FFF) {
theta = 0xFFFF - theta;
scale = -1; // second half of the sine function is negative (pi - 2*pi)
}
uint32_t precal = theta * (0x7FFF - theta);
uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required
int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4
int16_t result = numerator / denominator;
return result * scale;
}
inline int16_t cos16_t(uint16_t theta) {
return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2)
}
#if defined(ARDUINO_ARCH_ESP32)
// WLEDMM: use pre-calculated lookup-table for sin8_t
extern uint8_t sinT[256]; // wled_math.cpp
inline uint8_t sin8_t(uint8_t theta) { return sinT[theta];}
#else
// no LUT on 8266, to save 256 bytes of RAM
inline uint8_t sin8_t(uint8_t theta) {
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
}
#endif
inline uint8_t cos8_t(uint8_t theta) {
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
}
//float cos_t(float phi); // use float math
//float sin_t(float phi);
//float tan_t(float x);
int16_t sin16_t(uint16_t theta);
int16_t cos16_t(uint16_t theta);
uint8_t sin8_t(uint8_t theta);
uint8_t cos8_t(uint8_t theta);
float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf())
float cos_approx(float theta);