effect math sppedup - up to 3x faster
-> distortion waves 3x speedup -> hiphotic 2x speedup -> waving cell 1.5x speedup * replace sin8_t by lookup-table with pre-computed values * moved integer sin and cos to fcn_declare.h (inlined by the compiler) * moved gamma32 to fcn_declare.h (inlined by the compiler) * a few other small tweaks
This commit is contained in:
@@ -5883,8 +5883,8 @@ static const char _data_FX_MODE_2DSNOWFALL[] PROGMEM = "Snow Fall ☾@!,Spawn Ra
|
|||||||
uint16_t mode_2DHiphotic() { // By: ldirko https://editor.soulmatelights.com/gallery/810 , Modified by: Andrew Tuline
|
uint16_t mode_2DHiphotic() { // By: ldirko https://editor.soulmatelights.com/gallery/810 , Modified by: Andrew Tuline
|
||||||
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
|
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
|
||||||
|
|
||||||
const uint16_t cols = SEGMENT.virtualWidth();
|
const uint_fast16_t cols = SEGMENT.virtualWidth();
|
||||||
const uint16_t rows = SEGMENT.virtualHeight();
|
const uint_fast16_t rows = SEGMENT.virtualHeight();
|
||||||
const uint32_t a = strip.now / ((SEGMENT.custom3>>1)+1);
|
const uint32_t a = strip.now / ((SEGMENT.custom3>>1)+1);
|
||||||
|
|
||||||
for (int x = 0; x < cols; x++) {
|
for (int x = 0; x < cols; x++) {
|
||||||
@@ -8654,10 +8654,10 @@ static const char _data_FX_MODE_2DAKEMI[] PROGMEM = "Akemi@Color speed,Dance;Hea
|
|||||||
uint16_t mode_2Ddistortionwaves() {
|
uint16_t mode_2Ddistortionwaves() {
|
||||||
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
|
if (!strip.isMatrix) return mode_oops(); // not a 2D set-up
|
||||||
|
|
||||||
const uint16_t cols = SEGMENT.virtualWidth();
|
const uint_fast16_t cols = SEGMENT.virtualWidth();
|
||||||
const uint16_t rows = SEGMENT.virtualHeight();
|
const uint_fast16_t rows = SEGMENT.virtualHeight();
|
||||||
if (SEGENV.call == 0) {
|
if (SEGENV.call == 0) {
|
||||||
SEGMENT.setUpLeds();
|
//SEGMENT.setUpLeds();
|
||||||
SEGMENT.fill(BLACK);
|
SEGMENT.fill(BLACK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type)
|
#if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type)
|
||||||
#define FASTLED_INTERNAL
|
#define FASTLED_INTERNAL
|
||||||
|
#define USE_GET_MILLISECOND_TIMER
|
||||||
#include <FastLED.h>
|
#include <FastLED.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -162,7 +163,8 @@ inline __attribute__((hot)) CRGB ColorFromPaletteWLED(const CRGBPalette16& pal,
|
|||||||
green1 = (green1 * scale) >> 8;
|
green1 = (green1 * scale) >> 8;
|
||||||
blue1 = (blue1 * scale) >> 8;
|
blue1 = (blue1 * scale) >> 8;
|
||||||
}
|
}
|
||||||
return RGBW32(red1,green1,blue1,0);
|
//return RGBW32(red1,green1,blue1,0);
|
||||||
|
return CRGB(red1,green1,blue1);
|
||||||
}
|
}
|
||||||
#define ColorFromPalette ColorFromPaletteWLED // override fastled function
|
#define ColorFromPalette ColorFromPaletteWLED // override fastled function
|
||||||
|
|
||||||
|
|||||||
@@ -486,6 +486,9 @@ IRAM_ATTR_YN __attribute__((hot)) uint8_t gamma8_slow(uint8_t b) // WLEDMM add
|
|||||||
return gammaT[b];
|
return gammaT[b];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(ARDUINO_ARCH_ESP32)
|
||||||
|
// WLEDMM: gamma32() moved to fcn_declare.h (inlining for speed)
|
||||||
|
#else
|
||||||
// used for color gamma correction
|
// used for color gamma correction
|
||||||
IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
|
IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
|
||||||
{
|
{
|
||||||
@@ -500,3 +503,4 @@ IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
|
|||||||
b = gammaT[b];
|
b = gammaT[b];
|
||||||
return RGBW32(r, g, b, w);
|
return RGBW32(r, g, b, w);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
@@ -79,7 +79,6 @@ void setRandomColor(byte* rgb);
|
|||||||
uint8_t gamma8_cal(uint8_t b, float gamma);
|
uint8_t gamma8_cal(uint8_t b, float gamma);
|
||||||
void calcGammaTable(float gamma);
|
void calcGammaTable(float gamma);
|
||||||
uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure
|
uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure
|
||||||
uint32_t __attribute__((pure)) gamma32(uint32_t); // WLEDMM: added attribute pure
|
|
||||||
uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction
|
uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction
|
||||||
uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is)
|
uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is)
|
||||||
|
|
||||||
@@ -89,6 +88,34 @@ extern uint8_t gammaT[256]; // colors.cpp
|
|||||||
inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed
|
inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed
|
||||||
inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}
|
inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}
|
||||||
|
|
||||||
|
#if defined(ARDUINO_ARCH_ESP32)
|
||||||
|
#if !defined(RGBW32) // WLEDMM define color macros in case they are missing
|
||||||
|
#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b))))
|
||||||
|
#endif
|
||||||
|
#if !defined(W) && !defined(R) // WLEDMM define color macros in case they are missing
|
||||||
|
#define R(c) (byte((c) >> 16))
|
||||||
|
#define G(c) (byte((c) >> 8))
|
||||||
|
#define B(c) (byte(c))
|
||||||
|
#define W(c) (byte((c) >> 24))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern bool gammaCorrectCol; // wled.h
|
||||||
|
inline uint32_t __attribute__((hot)) gamma32(uint32_t color) { // WLEDMM: moved here for inlining
|
||||||
|
if (!gammaCorrectCol) return color;
|
||||||
|
uint8_t w = W(color);
|
||||||
|
uint8_t r = R(color);
|
||||||
|
uint8_t g = G(color);
|
||||||
|
uint8_t b = B(color);
|
||||||
|
w = gammaT[w];
|
||||||
|
r = gammaT[r];
|
||||||
|
g = gammaT[g];
|
||||||
|
b = gammaT[b];
|
||||||
|
return RGBW32(r, g, b, w);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
uint32_t __attribute__((pure)) gamma32(uint32_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
#define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility
|
#define gamma32inv(c) unGamma24(c) // WLEDMM alias for upstream compatibility
|
||||||
#define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility
|
#define gamma8inv(c) fast_unGamma8(c) // WLEDMM alias for upstream compatibility
|
||||||
|
|
||||||
@@ -489,13 +516,48 @@ void clearEEPROM();
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
//wled_math.cpp
|
//wled_math.cpp
|
||||||
|
void init_math();
|
||||||
|
|
||||||
|
// WLEDMM: math functions inlined for speed
|
||||||
|
|
||||||
|
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
|
||||||
|
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
|
||||||
|
// optimized integer implementation by @dedehai
|
||||||
|
inline int16_t sin16_t(uint16_t theta) {
|
||||||
|
int scale = 1;
|
||||||
|
if (theta > 0x7FFF) {
|
||||||
|
theta = 0xFFFF - theta;
|
||||||
|
scale = -1; // second half of the sine function is negative (pi - 2*pi)
|
||||||
|
}
|
||||||
|
uint32_t precal = theta * (0x7FFF - theta);
|
||||||
|
uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required
|
||||||
|
int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4
|
||||||
|
int16_t result = numerator / denominator;
|
||||||
|
return result * scale;
|
||||||
|
}
|
||||||
|
inline int16_t cos16_t(uint16_t theta) {
|
||||||
|
return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2)
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(ARDUINO_ARCH_ESP32)
|
||||||
|
// WLEDMM: use pre-calculated lookup-table for sin8_t
|
||||||
|
extern uint8_t sinT[256]; // wled_math.cpp
|
||||||
|
inline uint8_t sin8_t(uint8_t theta) { return sinT[theta];}
|
||||||
|
#else
|
||||||
|
// no LUT on 8266, to save 256 bytes of RAM
|
||||||
|
inline uint8_t sin8_t(uint8_t theta) {
|
||||||
|
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
|
||||||
|
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
|
||||||
|
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
inline uint8_t cos8_t(uint8_t theta) {
|
||||||
|
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
|
||||||
|
}
|
||||||
|
|
||||||
//float cos_t(float phi); // use float math
|
//float cos_t(float phi); // use float math
|
||||||
//float sin_t(float phi);
|
//float sin_t(float phi);
|
||||||
//float tan_t(float x);
|
//float tan_t(float x);
|
||||||
int16_t sin16_t(uint16_t theta);
|
|
||||||
int16_t cos16_t(uint16_t theta);
|
|
||||||
uint8_t sin8_t(uint8_t theta);
|
|
||||||
uint8_t cos8_t(uint8_t theta);
|
|
||||||
|
|
||||||
float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf())
|
float sin_approx(float theta); // uses integer math (converted to float), accuracy +/-0.0015 (compared to sinf())
|
||||||
float cos_approx(float theta);
|
float cos_approx(float theta);
|
||||||
|
|||||||
@@ -843,7 +843,7 @@ static inline int32_t lerpPerlin(int32_t a, int32_t b, int32_t t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 1D Perlin noise function that returns a value in range of -24691 to 24689
|
// 1D Perlin noise function that returns a value in range of -24691 to 24689
|
||||||
int32_t perlin1D_raw(uint32_t x, bool is16bit) {
|
int32_t IRAM_ATTR_YN perlin1D_raw(uint32_t x, bool is16bit) {
|
||||||
// integer and fractional part coordinates
|
// integer and fractional part coordinates
|
||||||
int32_t x0 = x >> 16;
|
int32_t x0 = x >> 16;
|
||||||
int32_t x1 = x0 + 1;
|
int32_t x1 = x0 + 1;
|
||||||
@@ -861,7 +861,7 @@ int32_t perlin1D_raw(uint32_t x, bool is16bit) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 2D Perlin noise function that returns a value in range of -20633 to 20629
|
// 2D Perlin noise function that returns a value in range of -20633 to 20629
|
||||||
int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) {
|
int32_t IRAM_ATTR_YN perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) {
|
||||||
int32_t x0 = x >> 16;
|
int32_t x0 = x >> 16;
|
||||||
int32_t y0 = y >> 16;
|
int32_t y0 = y >> 16;
|
||||||
int32_t x1 = x0 + 1;
|
int32_t x1 = x0 + 1;
|
||||||
@@ -893,7 +893,7 @@ int32_t perlin2D_raw(uint32_t x, uint32_t y, bool is16bit) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 3D Perlin noise function that returns a value in range of -16788 to 16381
|
// 3D Perlin noise function that returns a value in range of -16788 to 16381
|
||||||
int32_t perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) {
|
int32_t IRAM_ATTR_YN perlin3D_raw(uint32_t x, uint32_t y, uint32_t z, bool is16bit) {
|
||||||
int32_t x0 = x >> 16;
|
int32_t x0 = x >> 16;
|
||||||
int32_t y0 = y >> 16;
|
int32_t y0 = y >> 16;
|
||||||
int32_t z0 = z >> 16;
|
int32_t z0 = z >> 16;
|
||||||
|
|||||||
@@ -474,6 +474,8 @@ void WLED::setup()
|
|||||||
if (!Serial) delay(300); // just a tiny wait to avoid problems later when acessing serial
|
if (!Serial) delay(300); // just a tiny wait to avoid problems later when acessing serial
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
init_math(); // WLEDMM: pre-calculate some lookup tables
|
||||||
|
|
||||||
#ifdef ARDUINO_ARCH_ESP32
|
#ifdef ARDUINO_ARCH_ESP32
|
||||||
#if defined(WLED_DEBUG) && (defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3) || ARDUINO_USB_CDC_ON_BOOT)
|
#if defined(WLED_DEBUG) && (defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3) || ARDUINO_USB_CDC_ON_BOOT)
|
||||||
if (!Serial) delay(2500); // WLEDMM allow CDC USB serial to initialise (WLED_DEBUG only)
|
if (!Serial) delay(2500); // WLEDMM allow CDC USB serial to initialise (WLED_DEBUG only)
|
||||||
|
|||||||
@@ -59,10 +59,18 @@ float tan_t(float x) {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// WLEDMM: sin16_t() moved to fcn_declare.h (inlining for speed)
|
||||||
|
|
||||||
|
// WLEDMM: cos16_t() moved to fcn_declare.h (inlining for speed)
|
||||||
|
|
||||||
|
// WLEDMM: sin8_t() moved to fcn_declare.h (inlining for speed)
|
||||||
|
|
||||||
|
// WLEDMM: cos8_t() moved to fcn_declare.h (inlining for speed)
|
||||||
|
|
||||||
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
|
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
|
||||||
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
|
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
|
||||||
// optimized integer implementation by @dedehai
|
// optimized integer implementation by @dedehai
|
||||||
int16_t sin16_t(uint16_t theta) {
|
static int16_t sin16_calc(uint16_t theta) {
|
||||||
int scale = 1;
|
int scale = 1;
|
||||||
if (theta > 0x7FFF) {
|
if (theta > 0x7FFF) {
|
||||||
theta = 0xFFFF - theta;
|
theta = 0xFFFF - theta;
|
||||||
@@ -75,30 +83,34 @@ int16_t sin16_t(uint16_t theta) {
|
|||||||
return result * scale;
|
return result * scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t cos16_t(uint16_t theta) {
|
#if defined(ARDUINO_ARCH_ESP32)
|
||||||
return sin16_t(theta + 0x4000); //cos(x) = sin(x+pi/2)
|
static uint8_t sin8_calc(uint8_t theta) {
|
||||||
}
|
int32_t sin16 = sin16_calc((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
|
||||||
|
|
||||||
uint8_t sin8_t(uint8_t theta) {
|
|
||||||
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
|
|
||||||
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
|
sin16 += 0x7FFF + 128; //shift result to range 0-0xFFFF, +128 for rounding
|
||||||
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
|
return min(sin16, int32_t(0xFFFF)) >> 8; // min performs saturation, and prevents overflow
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t cos8_t(uint8_t theta) {
|
// WLEDMM: pre-calculate lookup-table for sin8_t
|
||||||
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
|
uint8_t DRAM_ATTR sinT[256];
|
||||||
|
void init_math(void) {
|
||||||
|
for (unsigned i = 0; i < 256; i++)
|
||||||
|
sinT[i] = sin8_calc(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
void init_math(void) { return;} // dummy for 8266
|
||||||
|
#endif
|
||||||
|
|
||||||
float sin_approx(float theta) {
|
float sin_approx(float theta) {
|
||||||
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
|
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
|
||||||
int32_t result = sin16_t(scaled_theta);
|
int32_t result = sin16_calc(scaled_theta);
|
||||||
float sin = float(result) / 0x7FFF;
|
float sin = float(result) / 0x7FFF;
|
||||||
return sin;
|
return sin;
|
||||||
}
|
}
|
||||||
|
|
||||||
float cos_approx(float theta) {
|
float cos_approx(float theta) {
|
||||||
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
|
uint16_t scaled_theta = (int)(theta * (float)(0xFFFF / M_TWOPI)); // note: do not cast negative float to uint! cast to int first (undefined on C3)
|
||||||
int32_t result = sin16_t(scaled_theta + 0x4000);
|
int32_t result = sin16_calc(scaled_theta + 0x4000);
|
||||||
float cos = float(result) / 0x7FFF;
|
float cos = float(result) / 0x7FFF;
|
||||||
return cos;
|
return cos;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user