explanations for CP437 translations, some cleanups, bugfix for drawString

* explanation for CP437 glyph groups
* translation for MonnModules symbol
* "smiley" replacement for 4-bytes and overlong unicode codes
* always compile unicodetool.cpp (codepage translation still depends on WLED_ENABLE_UNICODE)
* bugfix: DrawString now skips over glyphs that would be rejected by DrawCharacter
* minor cleanup
This commit is contained in:
Frank
2025-11-21 13:48:38 +01:00
parent 527acefdea
commit 213cd185b5
4 changed files with 63 additions and 30 deletions

View File

@@ -1,4 +1,9 @@
#if defined(WLED_ENABLE_FULL_FONTS)
/*
@title WLED(-MM) - unicode helper functions
@repo https://github.com/MoonModules/WLED-MM, https://github.com/wled/WLED
@Copyright © 2025 Github WLED and WLED-MM Commit Authors (see "git blame" for details)
@license Licensed under the EUPL-1.2 or later
*/
#include "codepages.h"
#include <string.h>
@@ -35,13 +40,15 @@ uint16_t unicodeToWchar16(const unsigned char* utf8, size_t maxLen) {
if (length < 3 || !isValidContinuation(utf8[1]) || !isValidContinuation(utf8[2])) return BAD_CODE; // malformed
codepoint = ((ch0 & 0b00001111) << 12) | ((utf8[1] & 0b00111111) << 6) | (utf8[2] & 0b00111111);
if (codepoint < 0x800) return UNKNOWN_CODE; // Reject overlong encodings (must be >= 0x800)
if (codepoint >= 0xD800 && codepoint <= 0xDFFF) return UNKNOWN_CODE; // Reject UTF-16 surrogate pairs (U+D800..U+DFFF)
if (codepoint >= 0x010000) codepoint = UNKNOWN_CODE; // result exceeds uint16_t => "unknown"
if (codepoint >= 0xD800 && codepoint <= 0xDFFF) return EXT_CODE; // Reject UTF-16 surrogate pairs (U+D800..U+DFFF)
if (codepoint >= 0x010000) codepoint = EXT_CODE; // result exceeds uint16_t (should not happen with well-formed UTF-8)
return uint16_t(codepoint);
}
}
// 4-byte sequence or invalid lead byte - since we only support up to 0xFFFF, return error marker
return BAD_CODE; // unsupported/invalid
// since we only support up to 0xFFFF, return error marker
if ((ch0 & 0b11111000) == 0b11110000) return EXT_CODE; // unsupported 4-byte sequence
else return BAD_CODE; // other unsupported/invalid
}
// returns a pointer to the next unicode item - can be used to "advance" conversion after unicodeToWchar16()
@@ -99,5 +106,3 @@ size_t cutUnicodeAt(const unsigned char* utf8, size_t where) {
if (utf8[where] > 127) where = max(0, int(where)-1);
return where;
}
#endif