decoder robustness improvements, and some nitpick

2025-11-20 00:12:26 +01:00
parent 5597695865
commit 6a4716e490
2 changed files with 12 additions and 4 deletions
--- a/wled00/src/font/unicodetool.cpp
+++ b/wled00/src/font/unicodetool.cpp
@@ -53,7 +53,9 @@ const unsigned char* nextUnicode(const unsigned char* utf8, size_t maxLen) {
  if (length < 1) return nullptr;      // we are at end of input

  unsigned char ch0 = *utf8;           // get leading character
-  size_t codeLength = 1;               // default:  1-byte ASCII
+
+  // Calculate code length based on lead byte
+  size_t codeLength = 1;                                       // default:  1-byte ASCII
  if (ch0 >= 0x80) {
    if      ((ch0 & 0b11100000) == 0b11000000) codeLength = 2; // 2-byte sequence
    else if ((ch0 & 0b11110000) == 0b11100000) codeLength = 3; // 3-byte sequence
@@ -61,8 +63,13 @@ const unsigned char* nextUnicode(const unsigned char* utf8, size_t maxLen) {
    else codeLength = 1; // Skip single invalid byte and try to resync
  }

-  if (length < codeLength) return nullptr; // Check if we have enough bytes
-  else return utf8 + codeLength; // success: advance stream
+  // handle invalid continuation bytes
+  if ((codeLength >= 2) && (length < 2 || !isValidContinuation(utf8[1]))) codeLength = 1; // try to re-sync
+  if ((codeLength >= 3) && (length < 3 || !isValidContinuation(utf8[2]))) codeLength = 1; // try to re-sync
+  if ((codeLength >= 4) && (length < 4 || !isValidContinuation(utf8[3]))) codeLength = 1; // try to re-sync
+
+  if (length < codeLength) return nullptr;                     // Check if we have enough bytes
+  else return utf8 + codeLength;                               // success: advance stream
 }

 #endif