diff --git a/wled00/FX.cpp b/wled00/FX.cpp index a4a6822c..26dfd110 100644 --- a/wled00/FX.cpp +++ b/wled00/FX.cpp @@ -5883,8 +5883,8 @@ static const char _data_FX_MODE_2DSNOWFALL[] PROGMEM = "Snow Fall ☾@!,Spawn Ra uint16_t mode_2DHiphotic() { // By: ldirko https://editor.soulmatelights.com/gallery/810 , Modified by: Andrew Tuline if (!strip.isMatrix) return mode_oops(); // not a 2D set-up - const uint16_t cols = SEGMENT.virtualWidth(); - const uint16_t rows = SEGMENT.virtualHeight(); + const uint_fast16_t cols = SEGMENT.virtualWidth(); + const uint_fast16_t rows = SEGMENT.virtualHeight(); const uint32_t a = strip.now / ((SEGMENT.custom3>>1)+1); for (int x = 0; x < cols; x++) { @@ -8654,10 +8654,10 @@ static const char _data_FX_MODE_2DAKEMI[] PROGMEM = "Akemi@Color speed,Dance;Hea uint16_t mode_2Ddistortionwaves() { if (!strip.isMatrix) return mode_oops(); // not a 2D set-up - const uint16_t cols = SEGMENT.virtualWidth(); - const uint16_t rows = SEGMENT.virtualHeight(); + const uint_fast16_t cols = SEGMENT.virtualWidth(); + const uint_fast16_t rows = SEGMENT.virtualHeight(); if (SEGENV.call == 0) { - SEGMENT.setUpLeds(); + //SEGMENT.setUpLeds(); SEGMENT.fill(BLACK); } diff --git a/wled00/colorTools.hpp b/wled00/colorTools.hpp index 27249e31..a2986f94 100644 --- a/wled00/colorTools.hpp +++ b/wled00/colorTools.hpp @@ -19,6 +19,7 @@ #if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type) #define FASTLED_INTERNAL + #define USE_GET_MILLISECOND_TIMER #include #endif @@ -162,7 +163,8 @@ inline __attribute__((hot)) CRGB ColorFromPaletteWLED(const CRGBPalette16& pal, green1 = (green1 * scale) >> 8; blue1 = (blue1 * scale) >> 8; } - return RGBW32(red1,green1,blue1,0); + //return RGBW32(red1,green1,blue1,0); + return CRGB(red1,green1,blue1); } #define ColorFromPalette ColorFromPaletteWLED // override fastled function diff --git a/wled00/colors.cpp b/wled00/colors.cpp index 33b37180..560e4855 100644 --- a/wled00/colors.cpp +++ b/wled00/colors.cpp @@ -486,6 +486,9 @@ IRAM_ATTR_YN __attribute__((hot)) uint8_t gamma8_slow(uint8_t b) // WLEDMM add return gammaT[b]; } +#if defined(ARDUINO_ARCH_ESP32) +// WLEDMM: gamma32() moved to fcn_declare.h (inlining for speed) +#else // used for color gamma correction IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color) { @@ -500,3 +503,4 @@ IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color) b = gammaT[b]; return RGBW32(r, g, b, w); } +#endif \ No newline at end of file diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index e114bc9f..73b23566 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -79,7 +79,6 @@ void setRandomColor(byte* rgb); uint8_t gamma8_cal(uint8_t b, float gamma); void calcGammaTable(float gamma); uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure -uint32_t __attribute__((pure)) gamma32(uint32_t); // WLEDMM: added attribute pure uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is) @@ -89,6 +88,34 @@ extern uint8_t gammaT[256]; // colors.cpp inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];} +#if defined(ARDUINO_ARCH_ESP32) +#if !defined(RGBW32) // WLEDMM define color macros in case they are missing +#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b)))) +#endif +#if !defined(W) && !defined(R) // WLEDMM define color macros in case they are missing +#define R(c) (byte((c) >> 16)) +#define G(c) (byte((c) >> 8)) +#define B(c) (byte(c)) +#define W(c) (byte((c) >> 24)) +#endif + +extern bool gammaCorrectCol; // wled.h +inline uint32_t __attribute__((hot)) gamma32(uint32_t color) { // WLEDMM: moved here for inlining + if (!gammaCorrectCol) return color; + uint8_t w = W(color); + uint8_t r = R(color); + uint8_t g = G(color); + uint8_t b = B(color); + w = gammaT[w]; + r = gammaT[r]; + g = gammaT[g]; + b = gammaT[b]; + return RGBW32(r, g, b, w); +} +#else +uint32_t __attribute__((pure)) gamma32(uint32_t); +#endif + #define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility #define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility @@ -489,13 +516,48 @@ void clearEEPROM(); #endif //wled_math.cpp +void init_math(); + +// WLEDMM: math functions inlined for speed + +// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x)) +// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767) +// optimized integer implementation by @dedehai +inline int16_t sin16_t(uint16_t theta) { + int scale = 1; + if (theta > 0x7FFF) { + theta = 0xFFFF - theta; + scale = -1; // second half of the sine function is negative (pi - 2*pi) + } + uint32_t precal = theta * (0x7FFF - theta); + uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required + int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4 + int16_t result = numerator / denominator; + return result * scale; +} +inline int16_t cos16_t(uint16_t theta) { + return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2) +} + +#if defined(ARDUINO_ARCH_ESP32) +// WLEDMM: use pre-calculated lookup-table for sin8_t +extern uint8_t sinT[256]; // wled_math.cpp +inline uint8_t sin8_t(uint8_t theta) { return sinT[theta];} +#else +// no LUT on 8266, to save 256 bytes of RAM +inline uint8_t sin8_t(uint8_t theta) { + int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF + sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding + return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow +} +#endif +inline uint8_t cos8_t(uint8_t theta) { + return sin8_t(theta + 64); //cos(x) = sin(x+pi/2) +} + //float cos_t(float phi); // use float math //float sin_t(float phi); //float tan_t(float x); -int16_t sin16_t(uint16_t theta); -int16_t cos16_t(uint16_t theta); -uint8_t sin8_t(uint8_t theta); -uint8_t cos8_t(uint8_t theta); float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf()) float cos_approx(float theta); diff --git a/wled00/util.cpp b/wled00/util.cpp index c47a7845..63e8b131 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -843,7 +843,7 @@ static inline int32_t lerpPerlin(int32_t a, int32_t b, int32_t t) { } // 1D Perlin noise function that returns a value in range of -24691 to 24689 -int32_t perlin1D_raw(uint32_t x, bool is16bit) { +int32_t IRAM_ATTR_YN perlin1D_raw(uint32_t x, bool is16bit) { // integer and fractional part coordinates int32_t x0 = x >> 16; int32_t x1 = x0 + 1; @@ -861,7 +861,7 @@ int32_t perlin1D_raw(uint32_t x, bool is16bit) { } // 2D Perlin noise function that returns a value in range of -20633 to 20629 -int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) { +int32_t IRAM_ATTR_YN perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) { int32_t x0 = x >> 16; int32_t y0 = y >> 16; int32_t x1 = x0 + 1; @@ -893,7 +893,7 @@ int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) { } // 3D Perlin noise function that returns a value in range of -16788 to 16381 -int32_t perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) { +int32_t IRAM_ATTR_YN perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) { int32_t x0 = x >> 16; int32_t y0 = y >> 16; int32_t z0 = z >> 16; diff --git a/wled00/wled.cpp b/wled00/wled.cpp index a91280e5..7274f2aa 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -474,6 +474,8 @@ void WLED::setup() if (!Serial) delay(300); // just a tiny wait to avoid problems later when acessing serial #endif + init_math(); // WLEDMM: pre-calculate some lookup tables + #ifdef ARDUINO_ARCH_ESP32 #if defined(WLED_DEBUG) && (defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3) || ARDUINO_USB_CDC_ON_BOOT) if (!Serial) delay(2500); // WLEDMM allow CDC USB serial to initialise (WLED_DEBUG only) diff --git a/wled00/wled_math.cpp b/wled00/wled_math.cpp index 1a6c9609..4905f194 100644 --- a/wled00/wled_math.cpp +++ b/wled00/wled_math.cpp @@ -59,10 +59,18 @@ float tan_t(float x) { } */ +// WLEDMM: sin16_t() moved to fcn_declare.h (inlining for speed) + +// WLEDMM: cos16_t() moved to fcn_declare.h (inlining for speed) + +// WLEDMM: sin8_t() moved to fcn_declare.h (inlining for speed) + +// WLEDMM: cos8_t() moved to fcn_declare.h (inlining for speed) + // 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x)) // input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767) // optimized integer implementation by @dedehai -int16_t sin16_t(uint16_t theta) { +static int16_t sin16_calc(uint16_t theta) { int scale = 1; if (theta > 0x7FFF) { theta = 0xFFFF - theta; @@ -75,30 +83,34 @@ int16_t sin16_t(uint16_t theta) { return result * scale; } -int16_t cos16_t(uint16_t theta) { - return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2) -} - -uint8_t sin8_t(uint8_t theta) { - int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF +#if defined(ARDUINO_ARCH_ESP32) +static uint8_t sin8_calc(uint8_t theta) { + int32_t sin16 = sin16_calc((uint16_t)theta * 257); // 255 * 257 = 0xFFFF sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow } -uint8_t cos8_t(uint8_t theta) { - return sin8_t(theta + 64); //cos(x) = sin(x+pi/2) +// WLEDMM: pre-calculate lookup-table for sin8_t +uint8_t DRAM_ATTR sinT[256]; +void init_math(void) { + for (unsigned i = 0; i < 256; i++) + sinT[i] = sin8_calc(i); } +#else +void init_math(void) { return;} // dummy for 8266 +#endif + float sin_approx(float theta) { uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3) - int32_t result = sin16_t(scaled_theta); + int32_t result = sin16_calc(scaled_theta); float sin = float(result) / 0x7FFF; return sin; } float cos_approx(float theta) { uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3) - int32_t result = sin16_t(scaled_theta + 0x4000); + int32_t result = sin16_calc(scaled_theta + 0x4000); float cos = float(result) / 0x7FFF; return cos; }