diff --git a/wled00/FX.cpp b/wled00/FX.cpp index 36eb5a69..a4a6822c 100644 --- a/wled00/FX.cpp +++ b/wled00/FX.cpp @@ -8738,15 +8738,17 @@ static const char _data_FX_MODE_2DDISTORTIONWAVES[] PROGMEM = "Distortion Waves@ //Idea from https://www.youtube.com/watch?v=DiHBgITrZck&ab_channel=StefanPetrick // adapted for WLED by @blazoncek, optimization by @dedehai static void soapPixels(bool isRow, uint8_t *noise3d, CRGB *pixels) { - const uint16_t cols = SEGMENT.virtualWidth(); - const uint16_t rows = SEGMENT.virtualHeight(); + const uint_fast16_t cols = SEGMENT.virtualWidth(); + const uint_fast16_t rows = SEGMENT.virtualHeight(); const auto myXY = [&](int x, int y) { return x + y * cols; }; + //const auto abs = [](int x) { return x<0 ? -x : x; }; // WLEDMM not any faster than abs() const int tRC = isRow ? rows : cols; // transpose if isRow const int tCR = isRow ? cols : rows; // transpose if isRow const int amplitude = max(1, (tCR - 8) >> 3) * (1 + (SEGMENT.custom1 >> 5)); const int shift = 0; //(128 - SEGMENT.custom2)*2; - CRGB ledsbuff[tCR]; + const size_t maxtCR = max(cols, rows); // WLEDMM always allocate max(cols,rows) buffer, to reduce heap fragmentation + CRGB ledsbuff[/*tCR*/ maxtCR]; // ledsbuff is a VLA in heap (not stack), allocated / removed for each function run - size = max(cols,rows) * sizeof(CRGB) for (int i = 0; i < tRC; i++) { int amount = ((int)noise3d[isRow ? i*cols : i] - 128) * amplitude + shift; // use first row/column: XY(0,i)/XY(i,0) @@ -8779,20 +8781,27 @@ static void soapPixels(bool isRow, uint8_t *noise3d, CRGB *pixels) { else PixelB = ColorFromPalette(SEGPALETTE, ~noise3d[indxB]*3); ledsbuff[j] = (PixelA.nscale8(ease8InOutApprox(255 - fraction))) + (PixelB.nscale8(ease8InOutApprox(fraction))); } - for (int j = 0; j < tCR; j++) { - CRGB c = ledsbuff[j]; - if (isRow) std::swap(j,i); - SEGMENT.setPixelColorXY(i, j, pixels[myXY(i,j)] = c); - if (isRow) std::swap(j,i); - } + // upstream code + //for (int j = 0; j < tCR; j++) { + // CRGB c = ledsbuff[j]; + // if (isRow) std::swap(j,i); + // SEGMENT.setPixelColorXY(i, j, pixels[myXY(i,j)] = c); + // if (isRow) std::swap(j,i); + //} + + // WLEDMM optimization: only copy back pixels, dump to LEDs later -> up to 30% faster + if (isRow) + for (int j = 0; j < tCR; j++) pixels[myXY(j,i)] = ledsbuff[j]; //swap x and y + else + for (int j = 0; j < tCR; j++) pixels[myXY(i,j)] = ledsbuff[j]; } } uint16_t mode_2Dsoap() { if (!strip.isMatrix) return mode_oops(); // not a 2D set-up - const uint16_t cols = SEGMENT.virtualWidth(); - const uint16_t rows = SEGMENT.virtualHeight(); + const uint_fast16_t cols = SEGMENT.virtualWidth(); + const uint_fast16_t rows = SEGMENT.virtualHeight(); const auto myXY = [&](int x, int y) { return x + y * cols; }; const size_t segSize = SEGMENT.width() * SEGMENT.height(); // prevent reallocation if mirrored or grouped @@ -8810,7 +8819,7 @@ uint16_t mode_2Dsoap() { // init if (SEGENV.call == 0) { random16_set_seed(535); //WLEDMM SuperSync - SEGENV.setUpLeds(); + //SEGENV.setUpLeds(); // leds buffer not needed, effect has its own pixel buffer SEGMENT.fill(BLACK); } @@ -8839,6 +8848,12 @@ uint16_t mode_2Dsoap() { soapPixels(true, noise3d, pixels); // rows soapPixels(false, noise3d, pixels); // cols + // WLEDMM optimization: dumping out all pixels at once is ~20% faster + for (int i = 0; i < cols; i++) { + for (int j = 0; j < rows; j++) { + SEGMENT.setPixelColorXY(i, j, uint32_t(pixels[myXY(i,j)]) & 0x00FFFFFF); + } } + return FRAMETIME; } static const char _data_FX_MODE_2DSOAP[] PROGMEM = "Soap@!,Smoothness,Density;;!;2;pal=11";