hot path optimization: make gamma correction inline
* make sure that gamma LUT is always initialized * remove some unnecessary safety checks * make gamma8() inline, for more speed * use fast unGamma8 for preview * add super-fast unGamma8 for HUB75 - old function lost 3-10 fps, this version does not reduce fps at all *grins*
This commit is contained in:
@@ -610,6 +610,20 @@ uint8_t BusHub75Matrix::activeType = 0;
|
||||
uint8_t BusHub75Matrix::instanceCount = 0;
|
||||
uint8_t BusHub75Matrix::last_bri = 0;
|
||||
|
||||
#ifndef NO_CIE1931
|
||||
|
||||
// WLEDMM speedup: create a version of "unGamma8" that can be inlined by the compiler
|
||||
extern uint8_t gammaTinv[256]; // defined in colors.cpp
|
||||
static uint8_t const* myGammaTable = gammaTinv; // local alias for gammaTinv
|
||||
|
||||
static inline uint8_t unGamma8_bus(uint8_t value) {
|
||||
return myGammaTable[value];
|
||||
}
|
||||
static inline uint32_t unGamma24_bus(uint32_t c) {
|
||||
return RGBW32(myGammaTable[R(c)], myGammaTable[G(c)], myGammaTable[B(c)], W(c));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// --------------------------
|
||||
// Bitdepth reduction based on panel size
|
||||
@@ -1080,6 +1094,13 @@ BusHub75Matrix::BusHub75Matrix(BusConfig &bc) : Bus(bc.type, bc.start, bc.autoWh
|
||||
activeFourScanPanel = fourScanPanel;
|
||||
if (newDisplay) memcpy(&activeMXconfig, &mxconfig, sizeof(mxconfig));
|
||||
}
|
||||
|
||||
#ifndef NO_CIE1931
|
||||
// force initial calculation of gamma correction tables
|
||||
if ((gammaCorrectVal < 0.999f) || (gammaCorrectVal > 3.0f)) calcGammaTable(1.0f);
|
||||
else calcGammaTable(gammaCorrectVal);
|
||||
#endif
|
||||
|
||||
instanceCount++;
|
||||
USER_PRINT(F("heap usage: ")); USER_PRINTLN(int(lastHeap - ESP.getFreeHeap()));
|
||||
}
|
||||
@@ -1142,13 +1163,12 @@ void __attribute__((hot)) IRAM_ATTR BusHub75Matrix::show(void) {
|
||||
for (int y=0; y<height; y++) for (int x=0; x<width; x++) {
|
||||
if (getBitFromArray(ledsDirty, pix) == true) { // only repaint the "dirty" pixels
|
||||
#ifndef NO_CIE1931
|
||||
uint32_t c = uint32_t(ledBuffer[pix]) & 0x00FFFFFF; // get RGB color, removing FastLED "alpha" component
|
||||
c = unGamma24(c); // to use the driver linear brightness feature, we first need to undo WLED gamma correction
|
||||
uint8_t r = R(c);
|
||||
uint8_t g = G(c);
|
||||
uint8_t b = B(c);
|
||||
const CRGB& c = ledBuffer[pix]; // c is an alias for ledBuffer[pix] - avoid creation of a temporary CRGB object instance
|
||||
uint8_t r = unGamma8_bus(c.r);
|
||||
uint8_t g = unGamma8_bus(c.g);
|
||||
uint8_t b = unGamma8_bus(c.b);
|
||||
#else
|
||||
const CRGB c = ledBuffer[pix]; // we stay on CRGB, instead of packing/unpacking the color value to uint32_t
|
||||
const CRGB& c = ledBuffer[pix]; // we stay on CRGB, instead of packing/unpacking the color value to uint32_t
|
||||
uint8_t r = c.r;
|
||||
uint8_t g = c.g;
|
||||
uint8_t b = c.b;
|
||||
|
||||
@@ -392,7 +392,7 @@ uint16_t approximateKelvinFromRGB(uint32_t rgb) {
|
||||
|
||||
#if !defined(WLED_USE_CIE_BRIGHTNESS_TABLE)
|
||||
//gamma 2.8 lookup table used for color correction
|
||||
static byte gammaT[256] = {
|
||||
byte DRAM_ATTR_YN gammaT[256] = { // WLEDMM: DRAM_ATTR to ensure that this table is in RAM (faster)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
|
||||
@@ -415,7 +415,7 @@ static byte gammaT[256] = {
|
||||
// https://github.com/Aircoookie/WLED/issues/2767#issuecomment-1310961308
|
||||
// unfortunately NeoPixelBus has its own internal table, that kills low brightness values similar to the original WLED table.
|
||||
// see https://github.com/Makuna/NeoPixelBus/blob/master/src/internal/NeoGamma.h
|
||||
static const byte gammaT[256] = {
|
||||
const DRAM_ATTR_YN byte gammaT[256] = { // WLEDMM make sure this table is in RAM (faster)
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4,
|
||||
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
|
||||
@@ -436,7 +436,7 @@ static const byte gammaT[256] = {
|
||||
#endif
|
||||
|
||||
// WLEDMM begin
|
||||
static uint8_t gammaTinv[256] = { 0 };
|
||||
uint8_t DRAM_ATTR_YN gammaTinv[256] = { 0 };
|
||||
static void calcInvGammaTable(float gamma)
|
||||
{
|
||||
float gammaInv = 1.0f / 2.4f; // surprise surprise: WLED palettes use a fixed gamma of 2.4 !!!
|
||||
@@ -448,9 +448,8 @@ static void calcInvGammaTable(float gamma)
|
||||
gammaTinv[255]=255;
|
||||
}
|
||||
IRAM_ATTR_YN uint8_t __attribute__((hot)) unGamma8(uint8_t value) {
|
||||
//if (!gammaCorrectCol || (value == 0) || (value == 255)) return value;
|
||||
if ((gammaCorrectVal < 0.999f) || (gammaCorrectVal > 3.0f)) return value;
|
||||
if (gammaTinv[255] == 0) calcInvGammaTable(gammaCorrectVal);
|
||||
//if ((gammaCorrectVal < 0.999f) || (gammaCorrectVal > 3.0f)) return value; // WLEDMM yes, looks stupid
|
||||
return gammaTinv[value];
|
||||
}
|
||||
|
||||
@@ -482,13 +481,13 @@ void calcGammaTable(float gamma)
|
||||
}
|
||||
|
||||
// used for individual channel or brightness gamma correction
|
||||
IRAM_ATTR_YN __attribute__((hot)) uint8_t gamma8(uint8_t b) // WLEDMM added IRAM_ATTR_YN
|
||||
IRAM_ATTR_YN __attribute__((hot)) uint8_t gamma8_slow(uint8_t b) // WLEDMM added IRAM_ATTR_YN
|
||||
{
|
||||
return gammaT[b];
|
||||
}
|
||||
|
||||
// used for color gamma correction
|
||||
uint32_t __attribute__((hot)) gamma32(uint32_t color)
|
||||
IRAM_ATTR_YN uint32_t __attribute__((hot)) gamma32(uint32_t color)
|
||||
{
|
||||
if (!gammaCorrectCol) return color;
|
||||
uint8_t w = W(color);
|
||||
|
||||
@@ -594,8 +594,10 @@
|
||||
// error only in MM, not in upstream... tbd: find out why
|
||||
#ifdef ARDUINO_ARCH_ESP32
|
||||
#define IRAM_ATTR_YN IRAM_ATTR
|
||||
#define DRAM_ATTR_YN DRAM_ATTR
|
||||
#else
|
||||
#define IRAM_ATTR_YN
|
||||
#define DRAM_ATTR_YN
|
||||
#endif
|
||||
|
||||
#define WLED_O2_ATTR __attribute__((optimize("O2")))
|
||||
|
||||
@@ -78,11 +78,17 @@ uint16_t __attribute__((const)) approximateKelvinFromRGB(uint32_t rgb);
|
||||
void setRandomColor(byte* rgb);
|
||||
uint8_t gamma8_cal(uint8_t b, float gamma);
|
||||
void calcGammaTable(float gamma);
|
||||
uint8_t __attribute__((pure)) gamma8(uint8_t b); // WLEDMM: added attribute pure
|
||||
uint8_t __attribute__((pure)) gamma8_slow(uint8_t b); // WLEDMM: added attribute pure
|
||||
uint32_t __attribute__((pure)) gamma32(uint32_t); // WLEDMM: added attribute pure
|
||||
uint8_t unGamma8(uint8_t value); // WLEDMM revert gamma correction
|
||||
uint32_t unGamma24(uint32_t c); // WLEDMM for 24bit color (white left as-is)
|
||||
|
||||
// WLEDMM: speedup - inline function for gamma correction
|
||||
extern uint8_t gammaTinv[256]; // colors.cpp
|
||||
extern uint8_t gammaT[256]; // colors.cpp
|
||||
inline uint8_t gamma8(uint8_t value) { return gammaT[value];} // WLEDMM inlined for speed
|
||||
inline uint8_t fast_unGamma8(uint8_t value) { return gammaTinv[value];}
|
||||
|
||||
//dmx_output.cpp
|
||||
void initDMXOutput();
|
||||
void handleDMXOutput();
|
||||
|
||||
@@ -886,6 +886,11 @@ void WLED::setup()
|
||||
#endif
|
||||
|
||||
USER_PRINT(F("Free heap ")); USER_PRINTLN(ESP.getFreeHeap());USER_PRINTLN();
|
||||
|
||||
// WLEDMM force initial calculation of gamma correction LUT
|
||||
if ((gammaCorrectVal < 0.999f) || (gammaCorrectVal > 3.0f)) calcGammaTable(1.0f); // no gamma => create linear LUT
|
||||
else calcGammaTable(gammaCorrectVal);
|
||||
|
||||
USER_PRINTLN(F("WLED initialization done.\n"));
|
||||
delay(50);
|
||||
// repeat Ada prompt
|
||||
|
||||
@@ -253,6 +253,7 @@ static bool sendLiveLedsWs(uint32_t wsClient) // WLEDMM added "static"
|
||||
}
|
||||
#endif
|
||||
|
||||
(void) unGamma8(127); // WLEDMM dummy call, just to make sure that gammaTinv is initialized, so we can use fast_unGamma8
|
||||
uint8_t stripBrightness = strip.getBrightness();
|
||||
for (size_t i = 0; pos < bufSize -2; i += n)
|
||||
{
|
||||
@@ -268,9 +269,9 @@ static bool sendLiveLedsWs(uint32_t wsClient) // WLEDMM added "static"
|
||||
if (gammaCorrectPreview) {
|
||||
uint8_t w = W(c); // not sure why, but it looks better if using "white" without corrections
|
||||
if (w>0) c = color_add(c, RGBW32(w, w, w, 0), false); // add white channel to RGB channels - color_add() will prevent over-saturation
|
||||
buffer[pos++] = unGamma8(R(c)); //R
|
||||
buffer[pos++] = unGamma8(G(c)); //G
|
||||
buffer[pos++] = unGamma8(B(c)); //B
|
||||
buffer[pos++] = fast_unGamma8(R(c)); //R
|
||||
buffer[pos++] = fast_unGamma8(G(c)); //G
|
||||
buffer[pos++] = fast_unGamma8(B(c)); //B
|
||||
} else {
|
||||
// WLEDMM end
|
||||
uint8_t w = W(c); // WLEDMM small optimization
|
||||
|
||||
Reference in New Issue
Block a user