align error characters

always use "small dot" for unsupported char, "bigger dot" to indicate unicode error
This commit is contained in:
Frank
2025-11-20 01:42:04 +01:00
parent c2b87f863f
commit 29e0afc0d7
2 changed files with 7 additions and 4 deletions

View File

@@ -3,6 +3,8 @@
#include "codepages.h"
#include <string.h>
constexpr uint8_t CP437_UNKNOWN = 250; // small middle dot · // not sure if we better return " " ?
// translates unicode 2-byte (UTF-16) "code points" into corresponding characters in codepage 437 (IBM PC aka PC-8)
// based on a table from https://en.wikipedia.org/wiki/Code_page_437#Character_set
uint16_t wchar16ToCodepage437(uint16_t wideChar) {
@@ -45,7 +47,7 @@ uint16_t wchar16ToCodepage437(uint16_t wideChar) {
case 0x25BC: return 0x1F; break; // ▼
// characters 127 - 254
case 0x2302: return 0x7F; break; // ⌂
case 0x2302: return 0x7F; break; // ⌂ (aka DEL)
case 0x00C7: return 0x80; break; // Ç
case 0x00FC: return 0x81; break; // ü
case 0x00E9: return 0x82; break; // é
@@ -201,7 +203,7 @@ uint16_t wchar16ToCodepage437(uint16_t wideChar) {
// everything else: unknown
//default: return 32; // blank
default: return 250; // small middle dot ·
default: return CP437_UNKNOWN; // small middle dot ·
}
}

View File

@@ -3,8 +3,9 @@
#include <stdlib.h> // needed to get uint16_t definition
#include <stdint.h> // helps for code analysis with clang
constexpr uint16_t UNKNOWN_CODE = 0x2022; // •
constexpr uint16_t BAD_CODE = 0x2022; //
//constexpr uint16_t UNKNOWN_CODE = 0x2219; // ∙ multiplication dot
constexpr uint16_t UNKNOWN_CODE = 0x00B7; // · middle dot
constexpr uint16_t BAD_CODE = 0x2022; // • bigger dot
// UTF8 → reduced UTF16 decoding
// translates the next unicode UTF-8 item into a 2-byte "code point"