truncate long segment name, instead of discarding

UX improvement: when segment names are too long (esp. with unicode), truncate it instead of deleting the name completely
2025-11-20 22:33:42 +01:00
parent 2c3592c3f3
commit 2f48f03e44
3 changed files with 31 additions and 3 deletions
--- a/wled00/json.cpp
+++ b/wled00/json.cpp
@@ -2,6 +2,10 @@

 #include "palettes.h"

+#if defined(WLED_ENABLE_FULL_FONTS)
+#include "src/font/codepages.h"
+#endif
+
 #define JSON_PATH_STATE      1
 #define JSON_PATH_INFO       2
 #define JSON_PATH_STATE_INFO 3
@@ -145,11 +149,19 @@ bool deserializeSegment(JsonObject elem, byte it, byte presetId)
    const char * name = elem["n"].as<const char*>();
    size_t len = 0;
    if (name != nullptr) len = strlen(name);
-    if (len > 0 && len < 32) {  // ToDO: this is why long segment names silently get deleted - truncating would be better
+    if (len > 0) {
+      // WLEDMM: truncate segment name, instead of silently deleting
+      if (len > 32) { // ToDO: use WLED_MAX_SEGNAME_LEN
+        len = 32;     // cut to max segment name length
+        #if defined(WLED_ENABLE_FULL_FONTS)
+        if (name[len] > 127) // UTF-8 => don't cut in the middle of a multi-byte char
+          len = cutUnicodeAt((unsigned char*)name, len-1) +1; // +1 to convert between index and length
+        #endif
+        USER_PRINTF("Segment name too long (%d chars), truncated to \"%.*s\"\n", strlen(name), (int)len, name);
+      }
      seg.name = new(std::nothrow) char[len+1];
-      if (seg.name) strlcpy(seg.name, name, len+1);
+      if (seg.name) strlcpy(seg.name, name, len+1); // copies at most size-1 characters and always null-terminates
    } else {
-      if (len > 0) { USER_PRINTF("! too-long segment name \"%s\" (%d chars) dropped.\n", name, len);}
      // but is empty (already deleted above)
      elem.remove("n");
    }
--- a/wled00/src/font/codepages.h
+++ b/wled00/src/font/codepages.h
@@ -24,6 +24,10 @@ const unsigned char* nextUnicode(const unsigned char* utf8, size_t maxLen);    /
 // unicode-aware string length
 size_t strlenUC(const unsigned char* utf8);

+// the next (lesser) string index that is safe for cutting an UTF-8 string
+// Important: calling code is responsible to provide a string with at least _where_ chars
+size_t cutUnicodeAt(const unsigned char* utf8, size_t where);
+
 // translates unicode 2-byte (UTF-16) "code point" into corresponding character in codepage 437 (IBM PC aka PC-8)
 uint16_t wchar16ToCodepage437(uint16_t wideChar);                              // codepage437.cpp

--- a/wled00/src/font/unicodetool.cpp
+++ b/wled00/src/font/unicodetool.cpp
@@ -83,4 +83,16 @@ size_t strlenUC(const unsigned char* utf8) {
  return letters;
 }

+// returns the next (lesser) string index that is safe for cutting an UTF-8 string
+// Important: calling code is responsible to provide a string with at least _where_ chars
+size_t cutUnicodeAt(const unsigned char* utf8, size_t where) {
+  if (utf8[where] <= 127) return where;  // ASCII
+  size_t loopMin = max(0, int(where)-4); // max 4 characters backwards
+  size_t whereStart = where;
+  while ((isValidContinuation(utf8[where])) && (where > loopMin)) where--; // UTF-8: back until we find a non-continuation char
+
+  if ((utf8[where] > 127) && isValidContinuation(utf8[whereStart])) where = max(0, int(where)-1); // most likely a UTF-8 lead byte -> go back one step
+  return where;
+}
+
 #endif