Segment drawing optimizations (FASTPATH only)

* cache width, height, and a few more values that are normally re-calculated for each pixel
* make normal sPC a bit faster
* Segment::fade_out() optimization

--> only active in FASTPATH mode, to preserve flash on small devices (8266)
--> code still needs some polishing
--> up to 20% speedup with some 2D effects (esp32,  flash qio 80Mhz, -O2)
This commit is contained in:
Frank
2024-08-05 21:30:25 +02:00
parent 078bd70775
commit 5240c3450b
4 changed files with 88 additions and 11 deletions

View File

@@ -434,6 +434,19 @@ typedef struct Segment {
static size_t _usedSegmentData; // WLEDMM uint16_t is too small
void setPixelColorXY_fast(int x, int y,uint32_t c, uint32_t scaled_col, int cols, int rows); // set relative pixel within segment with color - faster, but no error checking!!!
#ifdef WLEDMM_FASTPATH
// WLEDMM cache some values that won't change while drawing a frame
bool _isSimpleSegment = false;
bool _isValid2D = false;
uint8_t _brightness = 255; // final pixel brightness - including transitions and segment opacity
uint16_t _2dWidth = 0; // virtualWidth
uint16_t _2dHeight = 0; // virtualHeight
void setPixelColorXY_slow(int x, int y, uint32_t c); // set relative pixel within segment with color - full slow version
#else
void setPixelColorXY_slow(int x, int y, uint32_t c) { setPixelColorXY(x,y,c); } // not FASTPATH - slow is the normal
#endif
// perhaps this should be per segment, not static
static CRGBPalette16 _currentPalette; // palette used for current effect (includes transition, used in color_from_palette())
@@ -587,6 +600,7 @@ typedef struct Segment {
bool allocateData(size_t len);
void deallocateData(void);
void resetIfRequired(void);
void startFrame(void); // cache a few values that don't change while an effect is drawing
/**
* Flags that before the next effect is calculated,
* the internal segment state should be reset.
@@ -642,6 +656,7 @@ typedef struct Segment {
uint32_t __attribute__((pure)) color_wheel(uint8_t pos);
// 2D matrix
#ifndef WLEDMM_FASTPATH
inline uint16_t virtualWidth() const { // WLEDMM use fast types, and make function inline
uint_fast16_t groupLen = groupLength();
uint_fast16_t vWidth = ((transpose ? height() : width()) + groupLen - 1) / groupLen;
@@ -654,6 +669,23 @@ typedef struct Segment {
if (mirror_y) vHeight = (vHeight + 1) /2; // divide by 2 if mirror, leave at least a single LED
return vHeight;
}
#else
inline uint16_t virtualWidth() const { return(_2dWidth);} // WLEDMM get pre-calculated virtualWidth
inline uint16_t virtualHeight() const { return(_2dHeight);} // WLEDMM get pre-calculated virtualHeight
uint16_t calc_virtualWidth() const {
uint_fast16_t groupLen = groupLength();
uint_fast16_t vWidth = ((transpose ? height() : width()) + groupLen - 1) / groupLen;
if (mirror) vWidth = (vWidth + 1) /2; // divide by 2 if mirror, leave at least a single LED
return vWidth;
}
uint16_t calc_virtualHeight() const {
uint_fast16_t groupLen = groupLength();
uint_fast16_t vHeight = ((transpose ? width() : height()) + groupLen - 1) / groupLen;
if (mirror_y) vHeight = (vHeight + 1) /2; // divide by 2 if mirror, leave at least a single LED
return vHeight;
}
#endif
uint16_t nrOfVStrips(void) const;
void createjMap(); //WLEDMM jMap
@@ -666,8 +698,24 @@ typedef struct Segment {
return (x%width) + (y%height) * width;
}
//void setPixelColorXY_fast(int x, int y,uint32_t c); // set relative pixel within segment with color - wrapper for _fast
#ifdef WLEDMM_FASTPATH
// WLEDMM this is a "gateway" function - we either call _fast or fall back to "slow"
inline void setPixelColorXY(int x, int y, uint32_t col) {
if (!_isSimpleSegment) { // slow path
setPixelColorXY_slow(x, y, col);
} else { // fast path
// some sanity checks
if (!_isValid2D) return; // not active
if ((unsigned(x) >= _2dWidth) || (unsigned(y) >= _2dHeight)) return; // check if (x,y) are out-of-range - due to 2's complement, this also catches negative values
if (!_brightness && !transitional) return; // black-out
uint32_t scaled_col = (_brightness == 255) ? col : color_fade(col, _brightness); // calculate final color
setPixelColorXY_fast(x, y, col, scaled_col, int(_2dWidth), int(_2dHeight)); // call "fast" function
}
}
#else
void setPixelColorXY(int x, int y, uint32_t c); // set relative pixel within segment with color
#endif
inline void setPixelColorXY(unsigned x, unsigned y, uint32_t c) { setPixelColorXY(int(x), int(y), c); }
inline void setPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0) { setPixelColorXY(x, y, RGBW32(r,g,b,w)); }
inline void setPixelColorXY(int x, int y, CRGB c) { setPixelColorXY(x, y, RGBW32(c.r,c.g,c.b,0)); }

View File

@@ -217,6 +217,19 @@ uint32_t WS2812FX::getPixelColorXY(uint16_t x, uint16_t y) {
#ifndef WLED_DISABLE_2D
// WLEDMM cache some values so we don't need to re-calc then for each pixel
void Segment::startFrame(void) {
#ifdef WLEDMM_FASTPATH
_isValid2D = isActive() && is2D();
_brightness = currentBri(on ? opacity : 0);
_isSimpleSegment = (grouping == 1) && (spacing == 0); // we can handle pixels faster when no grouping or spacing is involved
// if (reverse_y) _isSimpleSegment = false; // for A/B testing
_2dWidth = is2D() ? calc_virtualWidth() : virtualLength();
_2dHeight = calc_virtualHeight();
#endif
}
// WLEDMM end
// XY(x,y) - gets pixel index within current segment (often used to reference leds[] array element)
// WLEDMM Segment::XY()is declared inline, see FX.h
@@ -266,7 +279,11 @@ void IRAM_ATTR Segment::setPixelColorXY_fast(int x, int y, uint32_t col, uint32_
// normal Segment::setPixelColorXY with error checking, and support for grouping / spacing
#ifdef WLEDMM_FASTPATH
void IRAM_ATTR_YN Segment::setPixelColorXY_slow(int x, int y, uint32_t col) //WLEDMM: IRAM_ATTR conditionally, renamed to "_slow"
#else
void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM: IRAM_ATTR conditionally
#endif
{
if (Segment::maxHeight==1) return; // not a matrix set-up
const int_fast16_t cols = virtualWidth(); // WLEDMM optimization
@@ -520,7 +537,7 @@ void Segment::box_blur(uint16_t i, bool vertical, fract8 blur_amount) {
for (int j = 0; j < dim1; j++) {
int x = vertical ? i : j;
int y = vertical ? j : i;
setPixelColorXY(x, y, out[j]);
if (in[j] != out[j]) setPixelColorXY(x, y, out[j]);
}
}
@@ -698,7 +715,7 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
// single pixel (line length == 0)
if (dx+dy == 0) {
if (simpleSegment) setPixelColorXY_fast(x0, y0, c, scaled_col, cols, rows);
else setPixelColorXY(x0, y0, c);
else setPixelColorXY_slow(x0, y0, c);
return;
}
@@ -734,7 +751,7 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
for (;;) {
// if (x0 >= cols || y0 >= rows) break; // WLEDMM we hit the edge - should never happen
if (simpleSegment) setPixelColorXY_fast(x0, y0, c, scaled_col, cols, rows);
else setPixelColorXY(x0, y0, c);
else setPixelColorXY_slow(x0, y0, c);
if (x0==x1 && y0==y1) break;
int e2 = err;
if (e2 >-dx) { err -= dy; x0 += sx; }

View File

@@ -266,6 +266,7 @@ void Segment::resetIfRequired() {
deallocateData();
next_time = 0; step = 0; call = 0; aux0 = 0; aux1 = 0;
reset = false; // setOption(SEG_OPTION_RESET, false);
startFrame(); // WLEDMM update cached propoerties
}
}
@@ -673,7 +674,11 @@ class JMapC {
if (size > 0)
return size;
else
#ifndef WLEDMM_FASTPATH
return SEGMENT.virtualWidth() * SEGMENT.virtualHeight(); //pixels
#else
return SEGMENT.calc_virtualWidth() * SEGMENT.calc_virtualHeight(); // calc pixel sizes
#endif
}
void setPixelColor(uint16_t i, uint32_t col) {
updatejMapDoc();
@@ -765,7 +770,11 @@ class JMapC {
jMapFile.close();
maxWidth++; maxHeight++;
#ifndef WLEDMM_FASTPATH
scale = min(SEGMENT.virtualWidth() / maxWidth, SEGMENT.virtualHeight() / maxHeight); // WLEDMM use native min/max
#else
scale = min(SEGMENT.calc_virtualWidth() / maxWidth, SEGMENT.calc_virtualHeight() / maxHeight); // WLEDMM re-calc width/heiht from active settings
#endif
dataSize += sizeof(jVectorMap);
USER_PRINT("dataSize ");
USER_PRINT(dataSize);
@@ -1057,7 +1066,7 @@ void IRAM_ATTR_YN Segment::setPixelColor(int i, uint32_t col) //WLEDMM: IRAM_ATT
// set pixel
if (x != lastX || y != lastY) { // only paint if pixel position is different
if (simpleSegment) setPixelColorXY_fast(x, y, col, scaled_col, vW, vH);
else setPixelColorXY(x, y, col);
else setPixelColorXY_slow(x, y, col);
}
lastX = x;
lastY = y;
@@ -1353,7 +1362,7 @@ void Segment::fill(uint32_t c) {
// fill 2D segment
for(int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) {
if (simpleSegment) setPixelColorXY_fast(x, y, c, scaled_col, cols, rows);
else setPixelColorXY(x, y, c);
else setPixelColorXY_slow(x, y, c);
}
} else { // fill 1D strip
for (int x = 0; x < cols; x++) setPixelColor(x, c);
@@ -1408,7 +1417,7 @@ void Segment::fade_out(uint8_t rate) {
int g2 = G(color2);
int b2 = B(color2);
for (uint_fast16_t y = 0; y < rows; y++) for (uint_fast16_t x = 0; x < cols; x++) {
for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) {
uint32_t color = is2D() ? getPixelColorXY(x, y) : getPixelColor(x);
if (color == color2) continue; // WLEDMM speedup - pixel color = target color, so nothing to do
int w1 = W(color);
@@ -1426,10 +1435,12 @@ void Segment::fade_out(uint8_t rate) {
rdelta += (r2 == r1) ? 0 : (r2 > r1) ? 1 : -1;
gdelta += (g2 == g1) ? 0 : (g2 > g1) ? 1 : -1;
bdelta += (b2 == b1) ? 0 : (b2 > b1) ? 1 : -1;
uint32_t colorNew = RGBW32(r1 + rdelta, g1 + gdelta, b1 + bdelta, w1 + wdelta); // WLEDMM
//if ((wdelta == 0) && (rdelta == 0) && (gdelta == 0) && (bdelta == 0)) continue; // WLEDMM delta = zero => no change // causes problem with text overlay
if (is2D()) setPixelColorXY((uint16_t)x, (uint16_t)y, r1 + rdelta, g1 + gdelta, b1 + bdelta, w1 + wdelta);
else setPixelColor((uint16_t)x, r1 + rdelta, g1 + gdelta, b1 + bdelta, w1 + wdelta);
if (colorNew != color) { // WLEDMM speedup - do not repaint the same color
if (is2D()) setPixelColorXY(x, y, colorNew);
else setPixelColor(x, colorNew);
}
}
}
@@ -1836,6 +1847,7 @@ void WS2812FX::service() {
if (!cctFromRgb || correctWB) busses.setSegmentCCT(seg.currentBri(seg.cct, true), correctWB);
for (uint8_t c = 0; c < NUM_COLORS; c++) _colors_t[c] = gamma32(_colors_t[c]);
seg.startFrame(); // WLEDMM
// effect blending (execute previous effect)
// actual code may be a bit more involved as effects have runtime data including allocated memory
//if (seg.transitional && seg._modeP) (*_mode[seg._modeP])(progress());

View File

@@ -8,7 +8,7 @@
*/
// version code in format yymmddb (b = daily build)
#define VERSION 2408050
#define VERSION 2408051
// WLEDMM - you can check for this define in usermods, to only enabled WLEDMM specific code in the "right" fork. Its not defined in AC WLED.
#define _MoonModules_WLED_