Merge pull request #269 from MoonModules/bus_sPC_gPC_optimizations

Bus level setPixelColor and getPixelColor optimizations
This commit is contained in:
Frank
2025-11-01 13:39:25 +01:00
committed by GitHub
4 changed files with 111 additions and 74 deletions

View File

@@ -79,6 +79,24 @@ uint8_t realtimeBroadcast(uint8_t type, IPAddress client, uint16_t length, byte
#include "wled.h"
#endif
// WLEDMM moved here (from colors.cpp) for better optimization
static inline uint32_t __attribute__((hot)) colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb) // WLEDMM: IRAM_ATTR removed, inline for speed
{
//remember so that slow colorKtoRGB() doesn't have to run for every setPixelColor()
static byte correctionRGB[4] = {255,255,255,0}; // default to neutral
static uint16_t lastKelvin = 0;
if (lastKelvin != kelvin) {
colorKtoRGB(kelvin, correctionRGB); // convert Kelvin to RGB (slow)
lastKelvin = kelvin;
}
byte rgbw[4];
rgbw[0] = ((uint_fast16_t) correctionRGB[0] * R(rgb)) /255; // correct R //WLEDMM changed to fast type
rgbw[1] = ((uint_fast16_t) correctionRGB[1] * G(rgb)) /255; // correct G
rgbw[2] = ((uint_fast16_t) correctionRGB[2] * B(rgb)) /255; // correct B
rgbw[3] = W(rgb);
return RGBW32(rgbw[0],rgbw[1],rgbw[2],rgbw[3]);
}
void ColorOrderMap::add(uint16_t start, uint16_t len, uint8_t colorOrder) {
if (_count >= WLED_MAX_COLOR_ORDER_MAPPINGS) {
@@ -96,31 +114,40 @@ void ColorOrderMap::add(uint16_t start, uint16_t len, uint8_t colorOrder) {
_count++;
}
uint8_t IRAM_ATTR ColorOrderMap::getPixelColorOrder(uint16_t pix, uint8_t defaultColorOrder) const {
uint8_t __attribute__((hot)) ColorOrderMap::getPixelColorOrder(uint16_t pix, uint8_t defaultColorOrder) const {
if (_count == 0) return defaultColorOrder;
// upper nibble contains W swap information
uint8_t swapW = defaultColorOrder >> 4;
for (uint8_t i = 0; i < _count; i++) {
if (pix >= _mappings[i].start && pix < (_mappings[i].start + _mappings[i].len)) {
return _mappings[i].colorOrder | (swapW << 4);
// upper nibble contains W swap information // WLEDMM optimization: avoid shifting >>4 and later undo by <<4
uint8_t swapW = defaultColorOrder & 0xF0;
// Scan mappings, using unsigned range test: pix in [start, start+len)
for (uint_fast8_t i = 0, n = _count; i < n; i++) { // WLEDMM small speedup, by avoiding repeated class member access
const auto &m = _mappings[i]; // WLEDMM help the compiler to optimize
if ((uint16_t)(pix - m.start) < m.len) { // True iff m.len > 0 and pix >= m.start and pix < m.start + m.len
return (m.colorOrder & 0x0F) | swapW; // add W swap information
}
}
return defaultColorOrder;
}
uint32_t Bus::autoWhiteCalc(uint32_t c) const {
uint8_t aWM = _autoWhiteMode;
if (_gAWM != AW_GLOBAL_DISABLED) aWM = _gAWM;
uint32_t __attribute__((hot)) Bus::autoWhiteCalc(uint32_t c) const {
uint8_t aWM = (_gAWM != AW_GLOBAL_DISABLED) ? _gAWM : _autoWhiteMode;
if (aWM == RGBW_MODE_MANUAL_ONLY) return c;
uint8_t w = W(c);
uint_fast8_t w = W(c);
//ignore auto-white calculation if w>0 and mode DUAL (DUAL behaves as BRIGHTER if w==0)
if (w > 0 && aWM == RGBW_MODE_DUAL) return c;
uint8_t r = R(c);
uint8_t g = G(c);
uint8_t b = B(c);
if (aWM == RGBW_MODE_MAX) return RGBW32(r, g, b, r > g ? (r > b ? r : b) : (g > b ? g : b)); // brightest RGB channel
w = r < g ? (r < b ? r : b) : (g < b ? g : b);
uint_fast8_t r = R(c);
uint_fast8_t g = G(c);
uint_fast8_t b = B(c);
// brightest RGB channel
if (aWM == RGBW_MODE_MAX) { // WLEDMM use max() instead of several nested conditions
w = max(r, g);
w = max(w, b);
return RGBW32(r, g, b, w);
}
// Other modes: smallest RGB channel // WLEDMM use min() instead of several nested conditions
w = min(r, g);
w = min(w, b);
if (aWM == RGBW_MODE_AUTO_ACCURATE) { r -= w; g -= w; b -= w; } //subtract w in ACCURATE mode
return RGBW32(r, g, b, w);
}
@@ -1214,7 +1241,7 @@ uint32_t BusManager::memUsage(BusConfig &bc) {
int BusManager::add(BusConfig &bc) {
if (getNumBusses() - getNumVirtualBusses() >= WLED_MAX_BUSSES) return -1;
// WLEDMM clear cached Bus info first
lastend = 0;
lastlen = 0;
laststart = 0;
lastBus = nullptr;
slowMode = false;
@@ -1255,7 +1282,7 @@ void BusManager::removeAll() {
// WLEDMM clear cached Bus info
lastBus = nullptr;
laststart = 0;
lastend = 0;
lastlen = 0;
slowMode = false;
}
@@ -1276,24 +1303,28 @@ void BusManager::setStatusPixel(uint32_t c) {
}
}
void IRAM_ATTR __attribute__((hot)) BusManager::setPixelColor(uint16_t pix, uint32_t c, int16_t cct) {
if (!slowMode && (pix >= laststart) && (pix < lastend ) && lastBus->isOk()) {
// WLEDMM same bus as last time - no need to search again
void IRAM_ATTR __attribute__((hot)) BusManager::setPixelColor(uint16_t pix, uint32_t c) {
// Fast path: check cached bus first (with proper nullptr check)
// optimization: below is True iff lastlen > 0 and pix >= laststart and pix < laststart + lastlen
if (!slowMode && lastBus && ((uint_fast16_t)(pix - laststart) < lastlen) && lastBus->isOk()) { // WLEDMM saves us a few cycles for each pixel
lastBus->setPixelColor(pix - laststart, c);
return;
}
for (uint_fast8_t i = 0; i < numBusses; i++) { // WLEDMM use fast native types
Bus* b = busses[i];
if (b->isOk() == false) continue; // WLEDMM ignore invalid (=not ready) busses
// Slow path: search through all buses
uint_fast8_t count = numBusses; // Cache to avoid repeated member access
for (uint_fast8_t i = 0; i < count; i++) {
Bus* const b = busses[i]; // Use const pointer for optimization hint
if ((!b) || (b->isOk() == false)) continue; // WLEDMM ignore invalid (=not ready) busses
uint_fast16_t bstart = b->getStart();
if (pix < bstart || pix >= bstart + b->getLength()) continue;
else {
uint_fast16_t blen = b->getLength();
if ((uint_fast16_t)(pix - bstart) < blen) { // Unsigned arithmetic trick for fast range check
if (!slowMode) {
// WLEDMM remember last Bus we took
// Cache bus info for next call
lastBus = b;
laststart = bstart;
lastend = bstart + b->getLength();
lastlen = blen;
}
b->setPixelColor(pix - bstart, c);
if (!slowMode) break; // WLEDMM found the right Bus -> so we can stop searching - unless we have busses that overlap
@@ -1317,47 +1348,53 @@ void __attribute__((cold)) BusManager::setSegmentCCT(int16_t cct, bool allowWBCo
}
uint32_t IRAM_ATTR __attribute__((hot)) BusManager::getPixelColor(uint_fast16_t pix) { // WLEDMM use fast native types, IRAM_ATTR
if ((pix >= laststart) && (pix < lastend ) && (lastBus != nullptr) && lastBus->isOk()) {
// Fast path: check cached bus first (with proper null check, and unsigned arithmetic trick for faster range check)
if (lastBus && ((uint_fast16_t)(pix - laststart) < lastlen) && lastBus->isOk()) {
// WLEDMM same bus as last time - no need to search again
return lastBus->getPixelColor(pix - laststart);
}
for (uint_fast8_t i = 0; i < numBusses; i++) {
Bus* b = busses[i];
if (b->isOk() == false) continue; // WLEDMM ignore invalid (=not ready) busses
uint_fast8_t count = numBusses; // Cache to avoid repeated member access
for (uint_fast8_t i = 0; i < count; i++) {
Bus* const b = busses[i]; // Use const pointer for optimization hint
if ((!b) || (b->isOk() == false)) continue; // WLEDMM ignore invalid (=not ready) busses
uint_fast16_t bstart = b->getStart();
if (pix < bstart || pix >= bstart + b->getLength()) continue;
else {
if (!slowMode) {
// WLEDMM remember last Bus we took
uint_fast16_t blen = b->getLength();
if ((uint_fast16_t)(pix - bstart) < blen) { // Unsigned arithmetic trick for fast range check
//if (!slowMode) {
// Cache bus info for next call
lastBus = b;
laststart = bstart;
lastend = bstart + b->getLength();
}
return b->getPixelColor(pix - bstart);
lastlen = blen;
//}
return b->getPixelColor(pix - bstart); // done - found one
}
}
return 0;
}
uint32_t IRAM_ATTR __attribute__((hot)) BusManager::getPixelColorRestored(uint_fast16_t pix) { // WLEDMM uses bus::getPixelColorRestored()
if ((pix >= laststart) && (pix < lastend ) && (lastBus != nullptr) && lastBus->isOk()) {
// Fast path: check cached bus first (with proper null check, and unsigned arithmetic trick for faster range check)
if (lastBus && ((uint_fast16_t)(pix - laststart) < lastlen) && lastBus->isOk()) {
// WLEDMM same bus as last time - no need to search again
return lastBus->getPixelColorRestored(pix - laststart);
}
for (uint_fast8_t i = 0; i < numBusses; i++) {
Bus* b = busses[i];
if (b->isOk() == false) continue; // WLEDMM ignore invalid (=not ready) busses
uint_fast8_t count = numBusses; // Cache to avoid repeated member access
for (uint_fast8_t i = 0; i < count; i++) {
Bus* const b = busses[i]; // Use const pointer for optimization hint
if ((!b) || (b->isOk() == false)) continue; // WLEDMM ignore invalid (=not ready) busses
uint_fast16_t bstart = b->getStart();
if (pix < bstart || pix >= bstart + b->getLength()) continue;
else {
if (!slowMode) {
// WLEDMM remember last Bus we took
uint_fast16_t blen = b->getLength();
if ((uint_fast16_t)(pix - bstart) < blen) { // Unsigned arithmetic trick for range check
//if (!slowMode) {
// Cache bus info for next call
lastBus = b;
laststart = bstart;
lastend = bstart + b->getLength();
}
lastlen = blen;
//}
return b->getPixelColorRestored(pix - bstart);
}
}

View File

@@ -175,7 +175,7 @@ class Bus {
_type == TYPE_ANALOG_2CH || _type == TYPE_ANALOG_5CH) return true;
return false;
}
static void setCCT(uint16_t cct) {
static void setCCT(int16_t cct) { // WLEDMM bugfix: parameter must be signed, otherwise "-1" becomes 65535 --> undefined behaviour on RISC-V
_cct = cct;
}
static void setCCTBlend(uint8_t b) {
@@ -463,13 +463,14 @@ class BusManager {
// WLEDMM clear cached Bus info
lastBus = nullptr;
laststart = 0;
lastend = 0;
lastlen= 0;
slowMode = isRTMode;
}
void setStatusPixel(uint32_t c);
void setPixelColor(uint16_t pix, uint32_t c, int16_t cct=-1);
void setPixelColor(uint16_t pix, uint32_t c); // WLEDMM third parameter "cct" is never used - removed
//void setPixelColor(uint16_t pix, uint32_t c, int16_t cct) {Bus::setCCT(cct); setPixelColor(pix, c);}; // WLEDMM legacy support - slow, avoid using it
void setBrightness(uint8_t b, bool immediate=false); // immediate=true is for use in ABL, it applies brightness immediately (warning: inefficient)
@@ -504,7 +505,7 @@ class BusManager {
// WLEDMM cache last used Bus -> 20% to 30% speedup when using many LED pins
Bus *lastBus = nullptr;
unsigned laststart = 0;
unsigned lastend = 0;
unsigned lastlen = 0;
bool slowMode = false; // WLEDMM not sure why we need this. But its necessary.
inline uint8_t getNumVirtualBusses() const {

View File

@@ -135,25 +135,37 @@ void colorHStoRGB(uint16_t hue, byte sat, byte* rgb) //hue, sat to rgb
//get RGB values from color temperature in K (https://tannerhelland.com/2012/09/18/convert-temperature-rgb-algorithm-code.html)
void colorKtoRGB(uint16_t kelvin, byte* rgb) //white spectrum to rgb, calc
{
// WLEDMM safe exit (do nothing) to avoid logf domain errors. argument to logf must be >= 1.0f to avoid bad result 0 or -inf;
// kelvin >65k might be a signed/unsigned conversion error
if ((kelvin < 1200) || (kelvin > 65000)) {
rgb[0] = 255;
rgb[1] = 255;
rgb[2] = 255;
rgb[3] = 0;
return;
}
int r = 0, g = 0, b = 0;
float temp = kelvin / 100.0f;
float temp = float(kelvin) / 100.0f; // WLEDMM "float()" added - to make sure its done in float, not in double or int
if (temp <= 66.0f) {
r = 255;
g = roundf(99.4708025861f * logf(temp) - 161.1195681661f);
if (temp <= 19.0f) {
b = 0;
} else {
b = roundf(138.5177312231f * logf((temp - 10.0f)) - 305.0447927307f);
b = roundf(138.5177312231f * logf((temp - 10.0f)) - 305.0447927307f); // safe because temp > 19.0f
}
} else {
// temp-60.0f is always > 0 here (since temp>66)
r = roundf(329.698727446f * powf((temp - 60.0f), -0.1332047592f));
g = roundf(288.1221695283f * powf((temp - 60.0f), -0.0755148492f));
b = 255;
}
//g += 12; //mod by Aircoookie, a bit less accurate but visibly less pinkish
rgb[0] = (uint8_t) constrain(r, 0, 255);
rgb[1] = (uint8_t) constrain(g, 0, 255);
rgb[2] = (uint8_t) constrain(b, 0, 255);
// WLEDMM min(max()) is faster than constrain()
rgb[0] = (uint8_t) min(max(r, 0), 255);
rgb[1] = (uint8_t) min(max(g, 0), 255);
rgb[2] = (uint8_t) min(max(b, 0), 255);
rgb[3] = 0;
}
@@ -301,22 +313,9 @@ static float maxf (float v, float w) // WLEDMM better use standard library fmax
}
#endif
// adjust RGB values based on color temperature in K (range [2800-10200]) (https://en.wikipedia.org/wiki/Color_balance)
// called from bus manager when color correction is enabled!
uint32_t __attribute__((hot)) IRAM_ATTR_YN colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb) // WLEDMM: IRAM_ATTR_YN
{
//remember so that slow colorKtoRGB() doesn't have to run for every setPixelColor()
static byte correctionRGB[4] = {0,0,0,0};
static uint16_t lastKelvin = 0;
if (lastKelvin != kelvin) colorKtoRGB(kelvin, correctionRGB); // convert Kelvin to RGB
lastKelvin = kelvin;
byte rgbw[4];
rgbw[0] = ((uint16_t) correctionRGB[0] * R(rgb)) /255; // correct R
rgbw[1] = ((uint16_t) correctionRGB[1] * G(rgb)) /255; // correct G
rgbw[2] = ((uint16_t) correctionRGB[2] * B(rgb)) /255; // correct B
rgbw[3] = W(rgb);
return RGBW32(rgbw[0],rgbw[1],rgbw[2],rgbw[3]);
}
// WLEDMM colorBalanceFromKelvin moved into bus_manager.cpp for better optimization
//approximates a Kelvin color temperature from an RGB color.
//this does no check for the "whiteness" of the color,

View File

@@ -66,7 +66,7 @@ void colorXYtoRGB(float x, float y, byte* rgb); // only defined if huesync disab
void colorRGBtoXY(byte* rgb, float* xy); // only defined if huesync disabled TODO
void colorFromDecOrHexString(byte* rgb, char* in);
bool colorFromHexString(byte* rgb, const char* in);
uint32_t colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb);
//uint32_t colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb); // WLEDMM function moved into bus_manager.cpp for better optimization
uint16_t __attribute__((const)) approximateKelvinFromRGB(uint32_t rgb); // WLEDMM: added attribute const
void setRandomColor(byte* rgb);
uint8_t gamma8_cal(uint8_t b, float gamma);