effect math sppedup - up to 3x faster

-> distortion waves 3x speedup -> hiphotic 2x speedup -> waving cell 1.5x speedup * replace sin8_t by lookup-table with pre-computed values * moved integer sin and cos to fcn_declare.h (inlined by the compiler) * moved gamma32 to fcn_declare.h (inlined by the compiler) * a few other small tweaks
2025-12-17 21:59:22 +01:00
parent 49e6de33c7
commit bc7cf062e8
7 changed files with 107 additions and 25 deletions
--- a/wled00/fcn_declare.h
+++ b/wled00/fcn_declare.h
@@ -79,7 +79,6 @@ void setRandomColor(byte* rgb);
 uint8_t gamma8_cal(uint8_t b, float gamma);
 void calcGammaTable(float gamma);
 uint8_t __attribute__((pure)) gamma8_slow(uint8_t b);                                         // WLEDMM: added attribute pure
-uint32_t __attribute__((pure)) gamma32(uint32_t);                                             // WLEDMM: added attribute pure
 uint8_t unGamma8(uint8_t value);                                                              // WLEDMM revert gamma correction
 uint32_t unGamma24(uint32_t c);                                                               // WLEDMM for 24bit color (white left as-is)

@@ -89,6 +88,34 @@ extern uint8_t gammaT[256];    // colors.cpp
 inline uint8_t gamma8(uint8_t value) { return gammaT[value];}           // WLEDMM inlined for speed
 inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}

+#if defined(ARDUINO_ARCH_ESP32)
+#if !defined(RGBW32)           // WLEDMM define color macros in case they are missing
+#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b))))
+#endif
+#if !defined(W) && !defined(R) // WLEDMM define color macros in case they are missing
+#define R(c) (byte((c) >> 16))
+#define G(c) (byte((c) >> 8))
+#define B(c) (byte(c))
+#define W(c) (byte((c) >> 24))
+#endif
+
+extern bool gammaCorrectCol;   // wled.h
+inline uint32_t __attribute__((hot)) gamma32(uint32_t color) {          // WLEDMM: moved here for inlining
+  if (!gammaCorrectCol) return color;
+  uint8_t w = W(color);
+  uint8_t r = R(color);
+  uint8_t g = G(color);
+  uint8_t b = B(color);
+  w = gammaT[w];
+  r = gammaT[r];
+  g = gammaT[g];
+  b = gammaT[b];
+  return RGBW32(r, g, b, w);
+}
+#else
+uint32_t __attribute__((pure)) gamma32(uint32_t);
+#endif
+
 #define gamma32inv(c) unGamma24(c)      // WLEDMM alias for upstream compatibility
 #define gamma8inv(c)  fast_unGamma8(c)  // WLEDMM alias for upstream compatibility

@@ -489,13 +516,48 @@ void clearEEPROM();
 #endif

 //wled_math.cpp
+void init_math();
+
+// WLEDMM: math functions inlined for speed
+
+// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
+// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
+// optimized integer implementation by @dedehai
+inline int16_t sin16_t(uint16_t theta) {
+  int scale = 1;
+  if (theta > 0x7FFF) {
+    theta = 0xFFFF - theta;
+    scale = -1; // second half of the sine function is negative (pi - 2*pi)
+  }
+  uint32_t precal = theta * (0x7FFF - theta);
+  uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required
+  int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4
+  int16_t result = numerator / denominator;
+  return result * scale;
+}
+inline int16_t cos16_t(uint16_t theta) {
+  return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2)
+}
+
+#if defined(ARDUINO_ARCH_ESP32)
+// WLEDMM: use pre-calculated lookup-table for sin8_t
+extern uint8_t sinT[256];    // wled_math.cpp
+inline uint8_t sin8_t(uint8_t theta) { return sinT[theta];}           
+#else
+// no LUT on 8266, to save 256 bytes of RAM
+inline uint8_t sin8_t(uint8_t theta) {
+  int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
+  sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
+  return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
+}
+#endif
+inline uint8_t cos8_t(uint8_t theta) {
+  return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
+}
+
 //float cos_t(float phi); // use float math
 //float sin_t(float phi);
 //float tan_t(float x);
-int16_t sin16_t(uint16_t theta);
-int16_t cos16_t(uint16_t theta);
-uint8_t sin8_t(uint8_t theta);
-uint8_t cos8_t(uint8_t theta);

 float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf())
 float cos_approx(float theta);