soap effect speedup (MM specific)

* postpone LEDs pixel drawing to the end of effect -> up to 30% faster * some minor optimizations: reduce heap fragmentation, use "fast" types
2025-12-17 16:55:09 +01:00
parent 4ec939cb4c
commit 49e6de33c7
1 changed files with 27 additions and 12 deletions
--- a/wled00/FX.cpp
+++ b/wled00/FX.cpp
@@ -8738,15 +8738,17 @@ static const char _data_FX_MODE_2DDISTORTIONWAVES[] PROGMEM = "Distortion Waves@
 //Idea from https://www.youtube.com/watch?v=DiHBgITrZck&ab_channel=StefanPetrick
 // adapted for WLED by @blazoncek, optimization by @dedehai
 static void soapPixels(bool isRow, uint8_t *noise3d, CRGB *pixels) {
-  const uint16_t cols = SEGMENT.virtualWidth();
-  const uint16_t rows = SEGMENT.virtualHeight();
+  const uint_fast16_t cols = SEGMENT.virtualWidth();
+  const uint_fast16_t rows = SEGMENT.virtualHeight();
  const auto myXY   = [&](int x, int y) { return x + y * cols; };
+  //const auto abs  = [](int x) { return x<0 ? -x : x; }; // WLEDMM not any faster than abs()
  const int  tRC  = isRow ? rows : cols; // transpose if isRow
  const int  tCR  = isRow ? cols : rows; // transpose if isRow
  const int  amplitude = max(1, (tCR - 8) >> 3) * (1 + (SEGMENT.custom1 >> 5));
  const int  shift = 0; //(128 - SEGMENT.custom2)*2;

-  CRGB ledsbuff[tCR];
+  const size_t maxtCR = max(cols, rows);  // WLEDMM always allocate max(cols,rows) buffer, to reduce heap fragmentation
+  CRGB ledsbuff[/*tCR*/ maxtCR];          // ledsbuff is a VLA in heap (not stack), allocated / removed for each function run - size = max(cols,rows) * sizeof(CRGB)

  for (int i = 0; i < tRC; i++) {
    int amount   = ((int)noise3d[isRow ? i*cols : i] - 128) * amplitude + shift; // use first row/column: XY(0,i)/XY(i,0)
@@ -8779,20 +8781,27 @@ static void soapPixels(bool isRow, uint8_t *noise3d, CRGB *pixels) {
      else                         PixelB = ColorFromPalette(SEGPALETTE, ~noise3d[indxB]*3);
      ledsbuff[j] = (PixelA.nscale8(ease8InOutApprox(255 - fraction))) + (PixelB.nscale8(ease8InOutApprox(fraction)));
    }
-    for (int j = 0; j < tCR; j++) {
-      CRGB c = ledsbuff[j];
-      if (isRow) std::swap(j,i);
-      SEGMENT.setPixelColorXY(i, j, pixels[myXY(i,j)] = c);
-      if (isRow) std::swap(j,i);
-    }
+    // upstream code
+    //for (int j = 0; j < tCR; j++) {
+    //  CRGB c = ledsbuff[j];
+    //  if (isRow) std::swap(j,i);
+    //  SEGMENT.setPixelColorXY(i, j, pixels[myXY(i,j)] = c);
+    //  if (isRow) std::swap(j,i);
+    //}
+
+    // WLEDMM optimization: only copy back pixels, dump to LEDs later -> up to 30% faster
+    if (isRow)
+      for (int j = 0; j < tCR; j++) pixels[myXY(j,i)] = ledsbuff[j]; //swap x and y
+    else
+      for (int j = 0; j < tCR; j++) pixels[myXY(i,j)] = ledsbuff[j];
  }
 }

 uint16_t mode_2Dsoap() {
  if (!strip.isMatrix) return mode_oops(); // not a 2D set-up

-  const uint16_t cols = SEGMENT.virtualWidth();
-  const uint16_t rows = SEGMENT.virtualHeight();
+  const uint_fast16_t cols = SEGMENT.virtualWidth();
+  const uint_fast16_t rows = SEGMENT.virtualHeight();
  const auto myXY = [&](int x, int y) { return x + y * cols; };

  const size_t segSize = SEGMENT.width() * SEGMENT.height(); // prevent reallocation if mirrored or grouped
@@ -8810,7 +8819,7 @@ uint16_t mode_2Dsoap() {
  // init
  if (SEGENV.call == 0) {
    random16_set_seed(535); //WLEDMM SuperSync
-    SEGENV.setUpLeds();
+    //SEGENV.setUpLeds();   // leds buffer not needed, effect has its own pixel buffer
    SEGMENT.fill(BLACK);
  }

@@ -8839,6 +8848,12 @@ uint16_t mode_2Dsoap() {
  soapPixels(true,  noise3d, pixels); // rows
  soapPixels(false, noise3d, pixels); // cols

+  // WLEDMM optimization: dumping out all pixels at once is ~20% faster
+  for (int i = 0; i < cols; i++) {
+    for (int j = 0; j < rows; j++) {
+      SEGMENT.setPixelColorXY(i, j, uint32_t(pixels[myXY(i,j)]) & 0x00FFFFFF);
+  } }
+
  return FRAMETIME;
 }
 static const char _data_FX_MODE_2DSOAP[] PROGMEM = "Soap@!,Smoothness,Density;;!;2;pal=11";