Unicode finally working, so does dialog(1) under tmux(1) that
authorMatthew Mondor <mmondor@pulsar-zone.net>
Wed, 13 Apr 2022 13:21:52 +0000 (13:21 +0000)
committerMatthew Mondor <mmondor@pulsar-zone.net>
Wed, 13 Apr 2022 13:21:52 +0000 (13:21 +0000)
remaps DEC Special Graphics to Unicode.

hacks/analogterm-main.c

index 16bb5b9..7c68611 100644 (file)
@@ -173,7 +173,19 @@ unsupported(struct terminal_controller_data *state, int statei, char statec,
 }
 
 static void
-at_ascii_printc (analogterm_state_t *st, uint32_t c, uint16_t m,
+at_ascii_printc (analogterm_state_t *st, unsigned char c, uint16_t m,
+                Bool scroll_p)
+{
+
+  c &= 0x7f;           /* XXX Since it's really for ASCII strip high bit */
+  if (scroll_p)
+    at_printc(st, c, m);
+  else
+    at_printc_noscroll(st, c, m);
+}
+
+static void
+at_unicode_printc (analogterm_state_t *st, uint32_t c, uint16_t m,
                 Bool scroll_p)
 {
 
@@ -220,6 +232,8 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state,
   int i, many;
   int start, end;
 
+  uint32_t uc = (uint32_t)-1;
+
   analogterm_state_t *st=sim->st;
 
   /* Mostly duplicated in phosphor.c */
@@ -350,7 +364,11 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state,
             }
 
           at_goto(st, state->cursor_y, state->cursor_x);  /* clips range */
-          at_ascii_printc (st, c, state->mode, False);
+         if (uc != (uint32_t)-1) {
+                 at_unicode_printc(st, uc, state->mode, False);
+                 uc = (uint32_t)-1;
+         } else
+                 at_ascii_printc(st, c, state->mode, False);
           state->cursor_x++;
 
           break;
@@ -419,6 +437,13 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state,
 
              We can just ignore this and always process UTF-8, I think?
              We must still catch the last byte, though.
+            XXX The current decoder somewhat maps to latin-1 but without
+            preserving accents.  I could use my custom utf-8 decoder that
+            also maps invalid glyphs to latin-1...  Afterall, we're not then
+            using that for output except to the screen we render and the goal
+            is to be able to display the maximum transparently without
+            bugging on invalid sequences.
+            Update: unicode finally works, see related comment below.
            */
         case '(': /* FALLTHROUGH */
        case ')': /* FALLTHROUGH */ /* XXX Links uses )0 without exit */
@@ -952,6 +977,8 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state,
         }
 
         if (state->unicruds >= total) {
+#if 0
+         /* XXX Old unicode to ASCII code */
           /* Done! Convert it to ASCII and print that. */
           char *s;
           state->unicrud[state->unicruds] = 0;
@@ -965,6 +992,22 @@ at_vt100_printc (analogterm_sim_t *sim, struct terminal_controller_data *state,
           } else {
             /* c = 0; */
           }
+#endif
+         /*
+          * XXX New UTF-8 to UCS-32 unicode code.
+          * This could probably still be improved like merging composed
+          * characters and mapping invalid sequences as latin-1.
+          * But it's a great step forward: unicode works and tmux-remapped
+          * DEC Special Graphics to unicode also works.
+          */
+         unsigned long luc, len;
+         len = utf8_decode((unsigned char *)state->unicrud, state->unicruds,
+           &luc);
+         if (len > 0)
+                 uc = (uint32_t)luc;
+          state->unicruds = 0;
+          state->escstate = 0;
+         goto PRINT;
         }
       }
       break;