(experimental) setPixelColorXY_fast speedup by setPixelColorXY_fast

* adding a _fast_ variant of SetPixelColorXY, that does not perform any error checking.
* drawLine uses the fast setPixelColor variant (bresenham algo only)
* for TESTING: segment option "reverse" switches back to the "original slow" code.
surely needs some more optimization and improvements.

First test on GEQ 3D shows 10%-30% speedup
This commit is contained in:
Frank
2024-07-14 14:11:11 +02:00
parent 75f0c7cb98
commit dddd1574ec
2 changed files with 88 additions and 1 deletions

View File

@@ -666,6 +666,9 @@ typedef struct Segment {
if (height == 0) return (x%width); // softhack007 avoid div/0
return (x%width) + (y%height) * width;
}
//void setPixelColorXY_fast(int x, int y,uint32_t c); // set relative pixel within segment with color - wrapper for _fast
void setPixelColorXY_fast(int x, int y,uint32_t c, uint32_t scaled_col, int cols, int rows); // set relative pixel within segment with color - faster, but no error checking!!!
void setPixelColorXY(int x, int y, uint32_t c); // set relative pixel within segment with color
inline void setPixelColorXY(unsigned x, unsigned y, uint32_t c) { setPixelColorXY(int(x), int(y), c); }
inline void setPixelColorXY(int x, int y, byte r, byte g, byte b, byte w = 0) { setPixelColorXY(x, y, RGBW32(r,g,b,w)); }
@@ -981,6 +984,7 @@ class WS2812FX { // 96 bytes
void
setUpMatrix(),
setPixelColorXY_fast(int x, int y, uint32_t c),
setPixelColorXY(int x, int y, uint32_t c);
// outsmart the compiler :) by correctly overloading

View File

@@ -176,6 +176,15 @@ void WS2812FX::setUpMatrix() {
#endif
}
// absolute matrix version of setPixelColor(), without error checking
void IRAM_ATTR WS2812FX::setPixelColorXY_fast(int x, int y, uint32_t col) //WLEDMM: IRAM_ATTR conditionally
{
uint_fast16_t index = y * Segment::maxWidth + x;
if (index < customMappingSize) index = customMappingTable[index];
if (index >= _length) return;
busses.setPixelColor(index, col);
}
// absolute matrix version of setPixelColor()
void IRAM_ATTR_YN WS2812FX::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM: IRAM_ATTR conditionally
{
@@ -211,6 +220,67 @@ uint32_t WS2812FX::getPixelColorXY(uint16_t x, uint16_t y) {
// XY(x,y) - gets pixel index within current segment (often used to reference leds[] array element)
// WLEDMM Segment::XY()is declared inline, see FX.h
// Simplified version of Segment::setPixelColorXY - without error checking. Does not support grouping or spacing
// * expects scaled color (final brightness) as additional input parameter, plus segment virtualWidth() and virtualHeight()
void IRAM_ATTR Segment::setPixelColorXY_fast(int x, int y, uint32_t col, uint32_t scaled_col, int cols, int rows) //WLEDMM
{
// if (Segment::maxHeight==1) return; // not a matrix set-up
// const int_fast16_t cols = virtualWidth(); // WLEDMM optimization
// const int_fast16_t rows = virtualHeight();
// if (x<0 || y<0 || x >= cols || y >= rows) return; // if pixel would fall out of virtual segment just exit
unsigned i = UINT_MAX;
bool sameColor = false;
if (ledsrgb) { // WLEDMM small optimization
//i = XY(x,y);
//i = (x%cols) + (y%rows) * cols; // avoid error checking done in XY()
i = x + y*cols; // avoid error checking done by XY() - be optimistic about ranges of x and y
CRGB fastled_col = CRGB(col);
if (ledsrgb[i] == fastled_col) sameColor = true;
else ledsrgb[i] = fastled_col;
}
#if 0
// we are NOT doing brightness here - must be done by the calling function!
//uint32_t scaled_col = col;
uint8_t _bri_t = currentBri(on ? opacity : 0);
if (!_bri_t && !transitional) return;
if (_bri_t < 255) scaled_col = color_fade(col, _bri_t);
else scaled_col = col;
#endif
#if 0 // this is still a dangerous optimization
if ((i < UINT_MAX) && sameColor && (call > 0) && (!transitional) && (ledsrgb[i] == CRGB(col)) && (_globalLeds == nullptr)) return; // WLEDMM looks like nothing to do (but we don't trust globalleds)
#endif
// handle reverse and transpose
if (reverse ) x = cols - x - 1;
if (reverse_y) y = rows - y - 1;
if (transpose) std::swap(x,y); // swap X & Y if segment transposed
// set the requested pixel
strip.setPixelColorXY_fast(start + x, startY + y, scaled_col);
bool simpleSegment = !mirror && !mirror_y;
//if (simpleSegment) return; // WLEDMM shortcut when no mirroring needed
// handle mirroring
const int_fast16_t wid_ = stop - start;
const int_fast16_t hei_ = stopY - startY;
//if (x >= wid_ || y >= hei_) return; // if pixel would fall out of segment just exit - should never happen, because width() >= virtualWidth()
if (mirror) { //set the corresponding horizontally mirrored pixel
if (transpose) strip.setPixelColorXY_fast(start + x, startY + hei_ - y - 1, scaled_col);
else strip.setPixelColorXY_fast(start + wid_ - x - 1, startY + y, scaled_col);
}
if (mirror_y) { //set the corresponding vertically mirrored pixel
if (transpose) strip.setPixelColorXY_fast(start + wid_ - x - 1, startY + y, scaled_col);
else strip.setPixelColorXY_fast(start + x, startY + hei_ - y - 1, scaled_col);
}
if (mirror_y && mirror) { //set the corresponding vertically AND horizontally mirrored pixel
strip.setPixelColorXY_fast(wid_ - x - 1, hei_ - y - 1, scaled_col);
}
}
// normal Segment::setPixelColorXY with error checking, and support for grouping / spacing
void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM: IRAM_ATTR conditionally
{
if (Segment::maxHeight==1) return; // not a matrix set-up
@@ -608,6 +678,7 @@ void Segment::nscale8(uint8_t scale) { //WLEDMM: use fast types
//line function
void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint32_t c, bool soft, uint16_t distance) {
if (!isActive()) return; // not active
// if (Segment::maxHeight==1) return; // not a matrix set-up
const int cols = virtualWidth();
const int rows = virtualHeight();
if (x0 >= cols || x1 >= cols || y0 >= rows || y1 >= rows) return;
@@ -621,6 +692,16 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
return;
}
// WLEDMM shortcut when no grouping/spacing used
bool simpleSegment = !reverse && (grouping == 1) && (spacing == 0); // !reverse is just for back-to-back testing against "slow" functions
uint32_t scaled_col = c;
if (simpleSegment) {
// segment brightness must be pre-calculated for the "fast" setPixelColorXY variant!
uint8_t _bri_t = currentBri(on ? opacity : 0);
if (!_bri_t && !transitional) return;
if (_bri_t < 255) scaled_col = color_fade(c, _bri_t);
}
if (soft) {
// Xiaolin Wus algorithm
const bool steep = dy > dx;
@@ -651,7 +732,9 @@ void Segment::drawLine(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint3
// Bresenham's algorithm
int err = (dx>dy ? dx : -dy)/2; // error direction
for (uint_fast16_t d=0; d<distance; d++) {
setPixelColorXY(x0, y0, c);
// if (x0 >= cols || y0 >= rows) break; // WLEDMM we hit the edge - should never happen
if (simpleSegment) setPixelColorXY_fast(x0, y0, c, scaled_col, cols, rows);
else setPixelColorXY(x0, y0, c);
if (x0==x1 && y0==y1) break;
int e2 = err;
if (e2 >-dx) { err -= dy; x0 += sx; }