From 40c96c14f50318d3d9073e90e7f09a6a5d5a2c64 Mon Sep 17 00:00:00 2001
From: Frank <frank.moehle@outlook.de>
Date: Sun, 30 Apr 2023 18:41:30 +0200
Subject: [PATCH] a bunch of smaller speedups to core functions

*  make local functions "static"
* use fast_ int types where possible
* use native min/max instead of MIN/MAX macros. Macros evaluate each parameter TWICE!!
* adding __attribute__((pure)) and __attribute((const)) to help the compiler optimize
* ws.cpp: reduce max "live leds" in fastpath mode
---
 wled00/FX.cpp          | 10 +++---
 wled00/FX_2Dfcn.cpp    | 64 +++++++++++++++++-----------------
 wled00/FX_fcn.cpp      | 79 +++++++++++++++++++++---------------------
 wled00/bus_manager.cpp | 14 ++++----
 wled00/bus_manager.h   |  2 +-
 wled00/colors.cpp      | 12 +++----
 wled00/fcn_declare.h   | 14 ++++----
 wled00/led.cpp         |  2 +-
 wled00/wled.h          |  2 +-
 wled00/ws.cpp          | 14 +++++---
 10 files changed, 109 insertions(+), 104 deletions(-)

diff --git a/wled00/FX.cpp b/wled00/FX.cpp
index acde24a9..d36bac0c 100644
--- a/wled00/FX.cpp
+++ b/wled00/FX.cpp
@@ -2243,7 +2243,7 @@ uint16_t mode_colortwinkle() {
       }
     }
   }
-  return FRAMETIME_FIXED;
+  return FRAMETIME_FIXED_SLOW;
 }
 static const char _data_FX_MODE_COLORTWINKLE[] PROGMEM = "Colortwinkles@Fade speed,Spawn speed;;!;;m12=0"; //pixels
 
@@ -3096,7 +3096,7 @@ uint16_t candle(bool multi)
     }
   }
 
-  return FRAMETIME_FIXED;
+  return FRAMETIME_FIXED_SLOW;
 }
 
 
@@ -6832,7 +6832,7 @@ uint16_t mode_blurz(void) {                    // Blurz. By Andrew Tuline.
     SEGMENT.setPixelColor(segLoc, color_blend(SEGCOLOR(1), SEGMENT.color_from_palette((uint16_t)pixColor, false, PALETTE_SOLID_WRAP, 0),(uint8_t)pixIntensity)); // repaint center pixel after blur
   } else SEGMENT.blur(max(SEGMENT.intensity, (uint8_t)1));  // silence - just blur it again
 
-  return FRAMETIME;
+  return FRAMETIME_FIXED;
 } // mode_blurz()
 static const char _data_FX_MODE_BLURZ[] PROGMEM = "Blurz ☾@Fade rate,Blur;!,Color mix;!;1f;sx=48,ix=127,m12=0,si=0"; // Pixels, Beatsin
 #endif
@@ -6944,7 +6944,7 @@ uint16_t mode_freqmap(void) {                   // Map FFT_MajorPeak to SEGLEN.
     SEGMENT.setPixelColor(locn, color_blend(SEGCOLOR(1), SEGMENT.color_from_palette(SEGMENT.intensity+pixCol, false, PALETTE_SOLID_WRAP, 0), bright));
   }
 
-  return FRAMETIME;
+  return FRAMETIME_FIXED;
 } // mode_freqmap()
 static const char _data_FX_MODE_FREQMAP[] PROGMEM = "Freqmap@Fade rate,Starting color;!,!;!;1f;m12=0,si=0"; // Pixels, Beatsin
 
@@ -7069,7 +7069,7 @@ uint16_t mode_freqwave(void) {                  // Freqwave. By Andreas Pleschun
   }
 
   uint8_t secondHand = micros()/(256-SEGMENT.speed)/500 % 16;
-  if((SEGMENT.speed > 254) || (SEGENV.aux0 != secondHand)) {   // WLEDMM allow run run at full speed
+  if((SEGMENT.speed > 254) || (SEGENV.aux0 != secondHand)) {   // WLEDMM allow to run at full speed
     SEGENV.aux0 = secondHand;
 
     float sensitivity = 0.5f * mapf(SEGMENT.custom3, 1, 31, 0.5, 10); // reduced resolution slider
diff --git a/wled00/FX_2Dfcn.cpp b/wled00/FX_2Dfcn.cpp
index 45eb6ac1..d862632d 100644
--- a/wled00/FX_2Dfcn.cpp
+++ b/wled00/FX_2Dfcn.cpp
@@ -160,7 +160,7 @@ void IRAM_ATTR_YN WS2812FX::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM
 {
 #ifndef WLED_DISABLE_2D
   if (!isMatrix) return; // not a matrix set-up
-  uint16_t index = y * Segment::maxWidth + x;
+  uint_fast16_t index = y * Segment::maxWidth + x;
 #else
   uint16_t index = x;
 #endif
@@ -172,7 +172,7 @@ void IRAM_ATTR_YN WS2812FX::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM
 // returns RGBW values of pixel
 uint32_t WS2812FX::getPixelColorXY(uint16_t x, uint16_t y) {
 #ifndef WLED_DISABLE_2D
-  uint16_t index = (y * Segment::maxWidth + x);
+  uint_fast16_t index = (y * Segment::maxWidth + x); //WLEDMM: use fast types
 #else
   uint16_t index = x;
 #endif
@@ -188,9 +188,9 @@ uint32_t WS2812FX::getPixelColorXY(uint16_t x, uint16_t y) {
 #ifndef WLED_DISABLE_2D
 
 // XY(x,y) - gets pixel index within current segment (often used to reference leds[] array element)
-uint16_t IRAM_ATTR_YN Segment::XY(uint16_t x, uint16_t y) { //WLEDMM: IRAM_ATTR conditionaly
-  uint16_t width  = virtualWidth();   // segment width in logical pixels
-  uint16_t height = virtualHeight();  // segment height in logical pixels
+uint16_t IRAM_ATTR_YN Segment::XY(uint16_t x, uint16_t y) { //WLEDMM: IRAM_ATTR conditionaly, use fast types
+  uint_fast16_t width  = virtualWidth();   // segment width in logical pixels
+  uint_fast16_t height = virtualHeight();  // segment height in logical pixels
   return (x%width) + (y%height) * width;
 }
 
@@ -221,7 +221,7 @@ void IRAM_ATTR_YN Segment::setPixelColorXY(int x, int y, uint32_t col) //WLEDMM:
 
   for (int j = 0; j < grouping; j++) {   // groupping vertically
     for (int g = 0; g < grouping; g++) { // groupping horizontally
-      uint16_t xX = (x+g), yY = (y+j);
+      uint_fast16_t xX = (x+g), yY = (y+j);    //WLEDMM: use fast types
       if (xX >= width() || yY >= height()) continue; // we have reached one dimension's end
 
       strip.setPixelColorXY(start + xX, startY + yY, col);
@@ -323,7 +323,7 @@ void Segment::blurRow(uint16_t row, fract8 blur_amount) {
   uint8_t keep = 255 - blur_amount;
   uint8_t seep = blur_amount >> 1;
   CRGB carryover = CRGB::Black;
-  for (uint16_t x = 0; x < cols; x++) {
+  for (uint_fast16_t x = 0; x < cols; x++) { //WLEDMM: use fast types
     CRGB cur = getPixelColorXY(x, row);
     CRGB part = cur;
     part.nscale8(seep);
@@ -348,7 +348,7 @@ void Segment::blurCol(uint16_t col, fract8 blur_amount) {
   uint8_t keep = 255 - blur_amount;
   uint8_t seep = blur_amount >> 1;
   CRGB carryover = CRGB::Black;
-  for (uint16_t i = 0; i < rows; i++) {
+  for (uint_fast16_t i = 0; i < rows; i++) { //WLEDMM: use fast types
     CRGB cur = getPixelColorXY(col, i);
     CRGB part = cur;
     part.nscale8(seep);
@@ -364,23 +364,23 @@ void Segment::blurCol(uint16_t col, fract8 blur_amount) {
 }
 
 // 1D Box blur (with added weight - blur_amount: [0=no blur, 255=max blur])
-void Segment::box_blur(uint16_t i, bool vertical, fract8 blur_amount) {
-  const uint16_t cols = virtualWidth();
-  const uint16_t rows = virtualHeight();
-  const uint16_t dim1 = vertical ? rows : cols;
-  const uint16_t dim2 = vertical ? cols : rows;
+void Segment::box_blur(uint16_t i, bool vertical, fract8 blur_amount) {  //WLEDMM: use fast types
+  const uint_fast16_t cols = virtualWidth();
+  const uint_fast16_t rows = virtualHeight();
+  const uint_fast16_t dim1 = vertical ? rows : cols;
+  const uint_fast16_t dim2 = vertical ? cols : rows;
   if (i >= dim2) return;
   const float seep = blur_amount/255.f;
   const float keep = 3.f - 2.f*seep;
   // 1D box blur
   CRGB tmp[dim1];
-  for (uint16_t j = 0; j < dim1; j++) {
-    uint16_t x = vertical ? i : j;
-    uint16_t y = vertical ? j : i;
-    uint16_t xp = vertical ? x : x-1;
-    uint16_t yp = vertical ? y-1 : y;
-    uint16_t xn = vertical ? x : x+1;
-    uint16_t yn = vertical ? y+1 : y;
+  for (uint_fast16_t j = 0; j < dim1; j++) {
+    uint_fast16_t x = vertical ? i : j;
+    uint_fast16_t y = vertical ? j : i;
+    uint_fast16_t xp = vertical ? x : x-1;
+    uint_fast16_t yp = vertical ? y-1 : y;
+    uint_fast16_t xn = vertical ? x : x+1;
+    uint_fast16_t yn = vertical ? y+1 : y;
     CRGB curr = getPixelColorXY(x,y);
     CRGB prev = (xp<0 || yp<0) ? CRGB::Black : getPixelColorXY(xp,yp);
     CRGB next = ((vertical && yn>=dim1) || (!vertical && xn>=dim1)) ? CRGB::Black : getPixelColorXY(xn,yn);
@@ -390,10 +390,10 @@ void Segment::box_blur(uint16_t i, bool vertical, fract8 blur_amount) {
     b = (curr.b*keep + (prev.b + next.b)*seep) / 3;
     tmp[j] = CRGB(r,g,b);
   }
-  for (uint16_t j = 0; j < dim1; j++) {
-    uint16_t x = vertical ? i : j;
-    uint16_t y = vertical ? j : i;
-    setPixelColorXY(x, y, tmp[j]);
+  for (uint_fast16_t j = 0; j < dim1; j++) {
+    uint_fast16_t x = vertical ? i : j;
+    uint_fast16_t y = vertical ? j : i;
+    setPixelColorXY((int)x, (int)y, tmp[j]);
   }
 }
 
@@ -411,9 +411,9 @@ void Segment::box_blur(uint16_t i, bool vertical, fract8 blur_amount) {
 //         eventually all the way to black; this is by design so that
 //         it can be used to (slowly) clear the LEDs to black.
 
-void Segment::blur1d(fract8 blur_amount) {
-  const uint16_t rows = virtualHeight();
-  for (uint16_t y = 0; y < rows; y++) blurRow(y, blur_amount);
+void Segment::blur1d(fract8 blur_amount) {   //WLEDMM: use fast types
+  const uint_fast16_t rows = virtualHeight();
+  for (uint_fast16_t y = 0; y < rows; y++) blurRow(y, blur_amount);
 }
 
 void Segment::moveX(int8_t delta) {
@@ -504,11 +504,11 @@ void Segment::fill_circle(uint16_t cx, uint16_t cy, uint8_t radius, CRGB col) {
   }
 }
 
-void Segment::nscale8(uint8_t scale) {
-  const uint16_t cols = virtualWidth();
-  const uint16_t rows = virtualHeight();
-  for(uint16_t y = 0; y < rows; y++) for (uint16_t x = 0; x < cols; x++) {
-    setPixelColorXY(x, y, CRGB(getPixelColorXY(x, y)).nscale8(scale));
+void Segment::nscale8(uint8_t scale) {  //WLEDMM: use fast types
+  const uint_fast16_t cols = virtualWidth();
+  const uint_fast16_t rows = virtualHeight();
+  for(uint_fast16_t y = 0; y < rows; y++) for (uint_fast16_t x = 0; x < cols; x++) {
+    setPixelColorXY((int)x, (int)y, CRGB(getPixelColorXY(x, y)).nscale8(scale));
   }
 }
 
diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp
index bc88546d..1fb514ea 100644
--- a/wled00/FX_fcn.cpp
+++ b/wled00/FX_fcn.cpp
@@ -220,7 +220,7 @@ CRGBPalette16 &Segment::loadPalette(CRGBPalette16 &targetPalette, uint8_t pal) {
   static unsigned long _lastPaletteChange = 0; // perhaps it should be per segment
   static CRGBPalette16 randomPalette = CRGBPalette16(DEFAULT_COLOR);
   static CRGBPalette16 prevRandomPalette = CRGBPalette16(CRGB(BLACK));
-  byte tcp[72];
+  byte tcp[76] = { 255 };   //WLEDMM: prevent out-of-range access in loadDynamicGradientPalette()
   if (pal < 245 && pal > GRADIENT_PALETTE_COUNT+13) pal = 0;
   if (pal > 245 && (strip.customPalettes.size() == 0 || 255U-pal > strip.customPalettes.size()-1)) pal = 0;
   //default palette. Differs depending on effect
@@ -426,13 +426,13 @@ void Segment::set(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, uint16_t o
     return;
   }
   if (i1 < Segment::maxWidth || (i1 >= Segment::maxWidth*Segment::maxHeight && i1 < strip.getLengthTotal())) start = i1; // Segment::maxWidth equals strip.getLengthTotal() for 1D
-  stop = i2 > Segment::maxWidth*Segment::maxHeight ? MIN(i2,strip.getLengthTotal()) : (i2 > Segment::maxWidth ? Segment::maxWidth : MAX(1,i2));
+  stop = i2 > Segment::maxWidth*Segment::maxHeight ? min(i2,strip.getLengthTotal()) : (i2 > Segment::maxWidth ? Segment::maxWidth : max((uint16_t)1,i2));  // WLEDMM: use native min/max
   startY = 0;
   stopY  = 1;
   #ifndef WLED_DISABLE_2D
   if (Segment::maxHeight>1) { // 2D
     if (i1Y < Segment::maxHeight) startY = i1Y;
-    stopY = i2Y > Segment::maxHeight ? Segment::maxHeight : MAX(1,i2Y);
+    stopY = i2Y > Segment::maxHeight ? Segment::maxHeight : max((uint16_t)1,i2Y);         // WLEDMM: use native min/max
   }
   #endif
   if (grp) {
@@ -536,16 +536,16 @@ void Segment::setPalette(uint8_t pal) {
 }
 
 // 2D matrix
-uint16_t Segment::virtualWidth() const {
-  uint16_t groupLen = groupLength();
-  uint16_t vWidth = ((transpose ? height() : width()) + groupLen - 1) / groupLen;
+uint16_t Segment::virtualWidth() const {  // WLEDMM use fast types
+  uint_fast16_t groupLen = groupLength();
+  uint_fast16_t vWidth = ((transpose ? height() : width()) + groupLen - 1) / groupLen;
   if (mirror) vWidth = (vWidth + 1) /2;  // divide by 2 if mirror, leave at least a single LED
   return vWidth;
 }
 
-uint16_t Segment::virtualHeight() const {
-  uint16_t groupLen = groupLength();
-  uint16_t vHeight = ((transpose ? width() : height()) + groupLen - 1) / groupLen;
+uint16_t Segment::virtualHeight() const {  // WLEDMM use fast types
+  uint_fast16_t groupLen = groupLength();
+  uint_fast16_t vHeight = ((transpose ? width() : height()) + groupLen - 1) / groupLen;
   if (mirror_y) vHeight = (vHeight + 1) /2;  // divide by 2 if mirror, leave at least a single LED
   return vHeight;
 }
@@ -641,8 +641,8 @@ class JMapC {
         File jMapFile;
         jMapFile = WLED_FS.open(jMapFileName, "r");
 
-        uint8_t maxWidth = 0;
-        uint8_t maxHeight = 0;
+        uint_fast16_t maxWidth = 0;       // WLEDMM fix uint8 overflow for large width/height
+        uint_fast16_t maxHeight = 0;      // WLEDMM
 
         //https://arduinojson.org/v6/how-to/deserialize-a-very-large-document/
         jMapFile.find("[");
@@ -666,8 +666,8 @@ class JMapC {
             if (arrayChunk[0].is<JsonArray>()) { //if array of arrays
               arrayAndSize.array = new XandY[arrayChunk.size()];
               for (JsonVariant arrayElement: arrayChunk) {
-                maxWidth = MAX(maxWidth, arrayElement[0].as<uint8_t>());
-                maxHeight = MAX(maxHeight, arrayElement[1].as<uint8_t>());
+                maxWidth = max((uint16_t)maxWidth, arrayElement[0].as<uint16_t>());       // WLEDMM use native min/max
+                maxHeight = max((uint16_t)maxHeight, arrayElement[1].as<uint16_t>());     // WLEDMM
                 arrayAndSize.array[arrayAndSize.size].x = arrayElement[0].as<uint8_t>();
                 arrayAndSize.array[arrayAndSize.size].y = arrayElement[1].as<uint8_t>();
                 arrayAndSize.size++;
@@ -676,8 +676,8 @@ class JMapC {
             }
             else { // if array (of x and y)
               arrayAndSize.array = new XandY[1];
-              maxWidth = MAX(maxWidth, arrayChunk[0].as<uint8_t>());
-              maxHeight = MAX(maxHeight, arrayChunk[1].as<uint8_t>());
+              maxWidth = max((uint16_t)maxWidth, arrayChunk[0].as<uint16_t>());         // WLEDMM use native min/max
+              maxHeight = max((uint16_t)maxHeight, arrayChunk[1].as<uint16_t>());       // WLEDMM
               arrayAndSize.array[arrayAndSize.size].x = arrayChunk[0].as<uint8_t>();
               arrayAndSize.array[arrayAndSize.size].y = arrayChunk[1].as<uint8_t>();
               arrayAndSize.size++;
@@ -690,8 +690,7 @@ class JMapC {
         } while (jMapFile.findUntil(",", "]"));
 
         maxWidth++; maxHeight++;
-        scale = MIN(SEGMENT.virtualWidth() / maxWidth, SEGMENT.virtualHeight() / maxHeight);
-
+        scale = min(SEGMENT.virtualWidth() / maxWidth, SEGMENT.virtualHeight() / maxHeight);  // WLEDMM use native min/max
         dataSize += sizeof(jVectorMap);
         USER_PRINT("dataSize ");
         USER_PRINT(dataSize);
@@ -1130,8 +1129,8 @@ void Segment::fade_out(uint8_t rate) {
   const uint_fast16_t cols = is2D() ? virtualWidth() : virtualLength();           // WLEDMM use fast int types
   const uint_fast16_t rows = virtualHeight(); // will be 1 for 1D
 
-  rate = (255-rate) >> 1;
-  float mappedRate = float(rate) +1.1;
+  uint_fast8_t fadeRate = (255-rate) >> 1;
+  float mappedRate_r = 1.0f / (float(fadeRate) +1.1f); // WLEDMM use reciprocal  1/mappedRate -> faster on non-FPU chips
 
   uint32_t color = colors[1]; // SEGCOLOR(1); // target color
   int w2 = W(color);
@@ -1146,10 +1145,10 @@ void Segment::fade_out(uint8_t rate) {
     int g1 = G(color);
     int b1 = B(color);
 
-    int wdelta = (w2 - w1) / mappedRate;
-    int rdelta = (r2 - r1) / mappedRate;
-    int gdelta = (g2 - g1) / mappedRate;
-    int bdelta = (b2 - b1) / mappedRate;
+    int wdelta = mappedRate_r * (w2 - w1);  // WLEDMM use receprocal - its faster
+    int rdelta = mappedRate_r * (r2 - r1);
+    int gdelta = mappedRate_r * (g2 - g1);
+    int bdelta = mappedRate_r * (b2 - b1);
 
     // if fade isn't complete, make sure delta is at least 1 (fixes rounding issues)
     wdelta += (w2 == w1) ? 0 : (w2 > w1) ? 1 : -1;
@@ -1166,10 +1165,11 @@ void Segment::fade_out(uint8_t rate) {
 void Segment::fadeToBlackBy(uint8_t fadeBy) {
   const uint_fast16_t cols = is2D() ? virtualWidth() : virtualLength();      // WLEDMM use fast int types
   const uint_fast16_t rows = virtualHeight(); // will be 1 for 1D
+  const uint_fast8_t scaledown = 255-fadeBy;  // WLEDMM faster to pre-compute this
 
   for (uint_fast16_t y = 0; y < rows; y++) for (uint_fast16_t x = 0; x < cols; x++) {
-    if (is2D()) setPixelColorXY((uint16_t)x, (uint16_t)y, CRGB(getPixelColorXY(x,y)).nscale8(255-fadeBy));
-    else        setPixelColor((uint16_t)x, CRGB(getPixelColor(x)).nscale8(255-fadeBy));
+    if (is2D()) setPixelColorXY((uint16_t)x, (uint16_t)y, CRGB(getPixelColorXY(x,y)).nscale8(scaledown));
+    else        setPixelColor((uint16_t)x, CRGB(getPixelColor(x)).nscale8(scaledown));
   }
 }
 
@@ -1233,14 +1233,14 @@ uint32_t Segment::color_wheel(uint8_t pos) {
 /*
  * Returns a new, random wheel index with a minimum distance of 42 from pos.
  */
-uint8_t Segment::get_random_wheel_index(uint8_t pos) {
-  uint8_t r = 0, x = 0, y = 0, d = 0;
+uint8_t Segment::get_random_wheel_index(uint8_t pos) { // WLEDMM use fast int types, use native min/max
+  uint_fast8_t r = 0, x = 0, y = 0, d = 0;
 
   while(d < 42) {
     r = random8();
-    x = abs(pos - r);
+    x = abs(int(pos - r));
     y = 255 - x;
-    d = MIN(x, y);
+    d = min(x, y);
   }
   return r;
 }
@@ -1254,7 +1254,7 @@ uint8_t Segment::get_random_wheel_index(uint8_t pos) {
  * @param pbri Value to scale the brightness of the returned color by. Default is 255. (no scaling)
  * @returns Single color from palette
  */
-uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri)
+uint32_t Segment::color_from_palette(uint_fast16_t i, bool mapping, bool wrap, uint8_t mcol, uint8_t pbri) // WLEDMM use fast int types
 {
   // default palette or no RGB support on segment
   if ((palette == 0 && mcol < NUM_COLORS) || !_isRGB) {
@@ -1265,7 +1265,8 @@ uint32_t Segment::color_from_palette(uint16_t i, bool mapping, bool wrap, uint8_
   }
 
   uint8_t paletteIndex = i;
-  if (mapping && virtualLength() > 1) paletteIndex = (i*255)/(virtualLength() -1);
+  uint_fast16_t vLen = mapping ? virtualLength() : 1;
+  if (mapping && vLen > 1) paletteIndex = (i*255)/(vLen -1);
   if (!wrap) paletteIndex = scale8(paletteIndex, 240); //cut off blend at palette "end"
   CRGB fastled_col;
   CRGBPalette16 curPal;
@@ -1525,7 +1526,7 @@ void IRAM_ATTR WS2812FX::setPixelColor(int i, uint32_t col)
   busses.setPixelColor(i, col);
 }
 
-uint32_t WS2812FX::getPixelColor(uint16_t i)
+uint32_t WS2812FX::getPixelColor(uint_fast16_t i) // WLEDMM fast int types
 {
   if (i < customMappingSize) i = customMappingTable[i];
   if (i >= _length) return 0;
@@ -1584,7 +1585,7 @@ void WS2812FX::estimateCurrentAndLimitBri() {
       byte r = R(c), g = G(c), b = B(c), w = W(c);
 
       if(useWackyWS2815PowerModel) { //ignore white component on WS2815 power calculation
-        busPowerSum += (MAX(MAX(r,g),b)) * 3;
+        busPowerSum += (max(max(r,g),b)) * 3; // WLEDMM use native min/max
       } else {
         busPowerSum += (r + g + b + w);
       }
@@ -1756,14 +1757,14 @@ uint8_t WS2812FX::getActiveSegmentsNum(void) {
   return c;
 }
 
-uint16_t WS2812FX::getLengthTotal(void) {
-  uint16_t len = Segment::maxWidth * Segment::maxHeight; // will be _length for 1D (see finalizeInit()) but should cover whole matrix for 2D
+uint16_t WS2812FX::getLengthTotal(void) {  // WLEDMM fast int types
+  uint_fast16_t len = Segment::maxWidth * Segment::maxHeight; // will be _length for 1D (see finalizeInit()) but should cover whole matrix for 2D
   if (isMatrix && _length > len) len = _length; // for 2D with trailing strip
   return len;
 }
 
-uint16_t WS2812FX::getLengthPhysical(void) {
-  uint16_t len = 0;
+uint16_t WS2812FX::getLengthPhysical(void) {  // WLEDMM fast int types
+  uint_fast16_t len = 0;
   for (unsigned b = 0; b < busses.getNumBusses(); b++) {   //  WLEDMM use native (fast) types
     Bus *bus = busses.getBus(b);
     if (bus->getType() >= TYPE_NET_DDP_RGB) continue; //exclude non-physical network busses
@@ -2024,7 +2025,7 @@ void WS2812FX::loadCustomPalettes() {
         if (!pal.isNull() && pal.size()>4) { // not an empty palette (at least 2 entries)
           if (pal[0].is<int>() && pal[1].is<const char *>()) {
             // we have an array of index & hex strings
-            size_t palSize = MIN(pal.size(), 36);
+            size_t palSize = min(pal.size(), (size_t)36);  // WLEDMM use native min/max
             palSize -= palSize % 2; // make sure size is multiple of 2
             for (size_t i=0, j=0; i<palSize && pal[i].as<int>()<256; i+=2, j+=4) {
               uint8_t rgbw[] = {0,0,0,0};
@@ -2034,7 +2035,7 @@ void WS2812FX::loadCustomPalettes() {
               DEBUG_PRINTF("%d(%d) : %d %d %d\n", i, int(tcp[j]), int(tcp[j+1]), int(tcp[j+2]), int(tcp[j+3]));
             }
           } else {
-            size_t palSize = MIN(pal.size(), 72);
+            size_t palSize = min(pal.size(), (size_t)72);    // WLEDMM use native min/max
             palSize -= palSize % 4; // make sure size is multiple of 4
             for (size_t i=0; i<palSize && pal[i].as<int>()<256; i+=4) {
               tcp[ i ] = (uint8_t) pal[ i ].as<int>(); // index
diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp
index 08bb5128..8d698ae3 100644
--- a/wled00/bus_manager.cpp
+++ b/wled00/bus_manager.cpp
@@ -505,9 +505,9 @@ void BusManager::setStatusPixel(uint32_t c) {
 }
 
 void IRAM_ATTR BusManager::setPixelColor(uint16_t pix, uint32_t c, int16_t cct) {
-  for (uint8_t i = 0; i < numBusses; i++) {
+  for (uint_fast8_t i = 0; i < numBusses; i++) {    // WLEDMM use fast native types
     Bus* b = busses[i];
-    uint16_t bstart = b->getStart();
+    uint_fast16_t bstart = b->getStart();
     if (pix < bstart || pix >= bstart + b->getLength()) continue;
     busses[i]->setPixelColor(pix - bstart, c);
   }
@@ -528,10 +528,10 @@ void BusManager::setSegmentCCT(int16_t cct, bool allowWBCorrection) {
   Bus::setCCT(cct);
 }
 
-uint32_t BusManager::getPixelColor(uint16_t pix) {
-  for (uint8_t i = 0; i < numBusses; i++) {
+uint32_t BusManager::getPixelColor(uint_fast16_t pix) {     // WLEDMM use fast native types
+  for (uint_fast8_t i = 0; i < numBusses; i++) {
     Bus* b = busses[i];
-    uint16_t bstart = b->getStart();
+    uint_fast16_t bstart = b->getStart();
     if (pix < bstart || pix >= bstart + b->getLength()) continue;
     return b->getPixelColor(pix - bstart);
   }
@@ -552,8 +552,8 @@ Bus* BusManager::getBus(uint8_t busNr) {
 
 //semi-duplicate of strip.getLengthTotal() (though that just returns strip._length, calculated in finalizeInit())
 uint16_t BusManager::getTotalLength() {
-  uint16_t len = 0;
-  for (uint8_t i=0; i<numBusses; i++) len += busses[i]->getLength();
+  uint_fast16_t len = 0;
+  for (uint_fast8_t i=0; i<numBusses; i++) len += busses[i]->getLength();      // WLEDMM use fast native types
   return len;
 }
 
diff --git a/wled00/bus_manager.h b/wled00/bus_manager.h
index ffb3bd14..136a338e 100644
--- a/wled00/bus_manager.h
+++ b/wled00/bus_manager.h
@@ -341,7 +341,7 @@ class BusManager {
 
     void setSegmentCCT(int16_t cct, bool allowWBCorrection = false);
 
-    uint32_t getPixelColor(uint16_t pix);
+    uint32_t getPixelColor(uint_fast16_t pix);
 
     bool canAllShow();
 
diff --git a/wled00/colors.cpp b/wled00/colors.cpp
index 61457eba..a35c506e 100644
--- a/wled00/colors.cpp
+++ b/wled00/colors.cpp
@@ -7,11 +7,11 @@
 /*
  * color blend function
  */
-uint32_t color_blend(uint32_t color1, uint32_t color2, uint16_t blend, bool b16) {
+IRAM_ATTR_YN uint32_t color_blend(uint32_t color1, uint32_t color2, uint_fast16_t blend, bool b16) {
   if(blend == 0)   return color1;
-  uint16_t blendmax = b16 ? 0xFFFF : 0xFF;
+  uint_fast16_t blendmax = b16 ? 0xFFFF : 0xFF;
   if(blend == blendmax) return color2;
-  uint8_t shift = b16 ? 16 : 8;
+  uint_fast8_t shift = b16 ? 16 : 8;
 
   uint32_t w1 = W(color1);
   uint32_t r1 = R(color1);
@@ -41,7 +41,7 @@ uint32_t color_add(uint32_t c1, uint32_t c2)
   uint32_t g = G(c1) + G(c2);
   uint32_t b = B(c1) + B(c2);
   uint32_t w = W(c1) + W(c2);
-  uint16_t max = r;
+  uint_fast16_t max = r;
   if (g > max) max = g;
   if (b > max) max = b;
   if (w > max) max = w;
@@ -233,13 +233,13 @@ bool colorFromHexString(byte* rgb, const char* in) {
   return true;
 }
 
-float minf (float v, float w)
+static float minf (float v, float w)  // WLEDMM better use standard library fminf()
 {
   if (w > v) return v;
   return w;
 }
 
-float maxf (float v, float w)
+static float maxf (float v, float w)  // WLEDMM better use standard library fmaxf()
 {
   if (w > v) return w;
   return v;
diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h
index 519ea5a6..525bbc7d 100644
--- a/wled00/fcn_declare.h
+++ b/wled00/fcn_declare.h
@@ -50,8 +50,8 @@ bool getJsonValue(const JsonVariant& element, DestType& destination, const Defau
 
 
 //colors.cpp
-uint32_t color_blend(uint32_t,uint32_t,uint16_t,bool b16=false);
-uint32_t color_add(uint32_t,uint32_t);
+uint32_t __attribute__((const)) color_blend(uint32_t,uint32_t,uint_fast16_t,bool b16=false);  // WLEDMM: added attribute const
+uint32_t  __attribute__((const)) color_add(uint32_t,uint32_t);                                // WLEDMM: added attribute const
 inline uint32_t colorFromRgbw(byte* rgbw) { return uint32_t((byte(rgbw[3]) << 24) | (byte(rgbw[0]) << 16) | (byte(rgbw[1]) << 8) | (byte(rgbw[2]))); }
 void colorHStoRGB(uint16_t hue, byte sat, byte* rgb); //hue, sat to rgb
 void colorKtoRGB(uint16_t kelvin, byte* rgb);
@@ -61,12 +61,12 @@ void colorRGBtoXY(byte* rgb, float* xy); // only defined if huesync disabled TOD
 void colorFromDecOrHexString(byte* rgb, char* in);
 bool colorFromHexString(byte* rgb, const char* in);
 uint32_t colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb);
-uint16_t approximateKelvinFromRGB(uint32_t rgb);
+uint16_t __attribute__((const)) approximateKelvinFromRGB(uint32_t rgb);                       // WLEDMM: added attribute const
 void setRandomColor(byte* rgb);
 uint8_t gamma8_cal(uint8_t b, float gamma);
 void calcGammaTable(float gamma);
-uint8_t gamma8(uint8_t b);
-uint32_t gamma32(uint32_t);
+uint8_t __attribute__((pure)) gamma8(uint8_t b);                                              // WLEDMM: added attribute pure
+uint32_t __attribute__((pure)) gamma32(uint32_t);                                             // WLEDMM: added attribute pure
 
 //dmx.cpp
 void initDMX();
@@ -162,7 +162,7 @@ void stateUpdated(byte callMode);
 void updateInterfaces(uint8_t callMode);
 void handleTransitions();
 void handleNightlight();
-byte scaledBri(byte in);
+byte __attribute__((pure)) scaledBri(byte in);                     // WLEDMM: added attribute pure
 
 //lx_parser.cpp
 bool parseLx(int lxValue, byte* rgbw);
@@ -348,7 +348,7 @@ void releaseJSONBufferLock();
 uint8_t extractModeName(uint8_t mode, const char *src, char *dest, uint8_t maxLen);
 uint8_t extractModeSlider(uint8_t mode, uint8_t slider, char *dest, uint8_t maxLen, uint8_t *var = nullptr);
 int16_t extractModeDefaults(uint8_t mode, const char *segVar);
-uint16_t crc16(const unsigned char* data_p, size_t length);
+uint16_t  __attribute__((pure)) crc16(const unsigned char* data_p, size_t length);   // WLEDMM: added attribute pure
 um_data_t* simulateSound(uint8_t simulationId);
 // WLEDMM enumerateLedmaps(); moved to FX.h
 CRGB getCRGBForBand(int x, uint8_t *fftResult, int pal); //WLEDMM netmindz ar palette
diff --git a/wled00/led.cpp b/wled00/led.cpp
index 7e85c36a..53a252a1 100644
--- a/wled00/led.cpp
+++ b/wled00/led.cpp
@@ -293,7 +293,7 @@ void handleNightlight()
 }
 
 //utility for FastLED to use our custom timer
-uint32_t get_millisecond_timer()
+uint32_t __attribute__((pure)) get_millisecond_timer() // WLEDMM attribute pure = does not write other momory
 {
   return strip.now;
 }
diff --git a/wled00/wled.h b/wled00/wled.h
index 44e51e53..5d2c15fc 100644
--- a/wled00/wled.h
+++ b/wled00/wled.h
@@ -8,7 +8,7 @@
  */
 
 // version code in format yymmddb (b = daily build)
-#define VERSION 2304212
+#define VERSION 2304300
 
 //uncomment this if you have a "my_config.h" file you'd like to use
 //#define WLED_USE_MY_CONFIG
diff --git a/wled00/ws.cpp b/wled00/ws.cpp
index efa9601d..d6842f86 100644
--- a/wled00/ws.cpp
+++ b/wled00/ws.cpp
@@ -5,8 +5,8 @@
  */
 #ifdef WLED_ENABLE_WEBSOCKETS
 
-uint16_t wsLiveClientId = 0;
-unsigned long wsLastLiveTime = 0;
+static volatile uint16_t wsLiveClientId = 0;        // WLEDMM added "static"
+static volatile unsigned long wsLastLiveTime = 0;   // WLEDMM
 //uint8_t* wsFrameBuffer = nullptr;
 
 #define WS_LIVE_INTERVAL 40
@@ -153,16 +153,20 @@ void sendDataWs(AsyncWebSocketClient * client)
   releaseJSONBufferLock();
 }
 
-bool sendLiveLedsWs(uint32_t wsClient)
+static bool sendLiveLedsWs(uint32_t wsClient)  // WLEDMM added "static"
 {
   AsyncWebSocketClient * wsc = ws.client(wsClient);
   if (!wsc || wsc->queueLength() > 0) return false; //only send if queue free
 
   size_t used = strip.getLengthTotal();
 #ifdef ESP8266
-  const size_t MAX_LIVE_LEDS_WS = 256U;
+  constexpr size_t MAX_LIVE_LEDS_WS = 256U;
 #else
-  const size_t MAX_LIVE_LEDS_WS = 4096U;  //WLEDMM use 4096 as max matrix size
+ #if !defined(WLEDMM_FASTPATH)
+  constexpr size_t MAX_LIVE_LEDS_WS = 4096U;  //WLEDMM use 4096 as max matrix size
+ #else
+  constexpr size_t MAX_LIVE_LEDS_WS = 2048U;  //WLEDMM use 2048 as max matrix size - reduce "effect hickups" due to long transmissions
+ #endif
 #endif
   size_t n = ((used -1)/MAX_LIVE_LEDS_WS) +1; //only serve every n'th LED if count over MAX_LIVE_LEDS_WS
   size_t pos = (strip.isMatrix ? 4 : 2);