From 410c3e45464ecd78c72848d44440b7b83651eae1 Mon Sep 17 00:00:00 2001 From: Matthew Mondor Date: Wed, 13 Apr 2022 13:21:52 +0000 Subject: [PATCH] Unicode finally working, so does dialog(1) under tmux(1) that remaps DEC Special Graphics to Unicode. --- hacks/analogterm-main.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/hacks/analogterm-main.c b/hacks/analogterm-main.c index 16bb5b9..7c68611 100644 --- a/hacks/analogterm-main.c +++ b/hacks/analogterm-main.c @@ -173,7 +173,19 @@ unsupported(struct terminal_controller_data *state, int statei, char statec, } static void -at_ascii_printc (analogterm_state_t *st, uint32_t c, uint16_t m, +at_ascii_printc (analogterm_state_t *st, unsigned char c, uint16_t m, + Bool scroll_p) +{ + + c &= 0x7f; /* XXX Since it's really for ASCII strip high bit */ + if (scroll_p) + at_printc(st, c, m); + else + at_printc_noscroll(st, c, m); +} + +static void +at_unicode_printc (analogterm_state_t *st, uint32_t c, uint16_t m, Bool scroll_p) { @@ -220,6 +232,8 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state, int i, many; int start, end; + uint32_t uc = (uint32_t)-1; + analogterm_state_t *st=sim->st; /* Mostly duplicated in phosphor.c */ @@ -350,7 +364,11 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state, } at_goto(st, state->cursor_y, state->cursor_x); /* clips range */ - at_ascii_printc (st, c, state->mode, False); + if (uc != (uint32_t)-1) { + at_unicode_printc(st, uc, state->mode, False); + uc = (uint32_t)-1; + } else + at_ascii_printc(st, c, state->mode, False); state->cursor_x++; break; @@ -419,6 +437,13 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state, We can just ignore this and always process UTF-8, I think? We must still catch the last byte, though. + XXX The current decoder somewhat maps to latin-1 but without + preserving accents. I could use my custom utf-8 decoder that + also maps invalid glyphs to latin-1... Afterall, we're not then + using that for output except to the screen we render and the goal + is to be able to display the maximum transparently without + bugging on invalid sequences. + Update: unicode finally works, see related comment below. */ case '(': /* FALLTHROUGH */ case ')': /* FALLTHROUGH */ /* XXX Links uses )0 without exit */ @@ -952,6 +977,8 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state, } if (state->unicruds >= total) { +#if 0 + /* XXX Old unicode to ASCII code */ /* Done! Convert it to ASCII and print that. */ char *s; state->unicrud[state->unicruds] = 0; @@ -965,6 +992,22 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state, } else { /* c = 0; */ } +#endif + /* + * XXX New UTF-8 to UCS-32 unicode code. + * This could probably still be improved like merging composed + * characters and mapping invalid sequences as latin-1. + * But it's a great step forward: unicode works and tmux-remapped + * DEC Special Graphics to unicode also works. + */ + unsigned long luc, len; + len = utf8_decode((unsigned char *)state->unicrud, state->unicruds, + &luc); + if (len > 0) + uc = (uint32_t)luc; + state->unicruds = 0; + state->escstate = 0; + goto PRINT; } } break; -- 2.9.0