From 7078556f9d055cb46339d436add2a03cc8abbc71 Mon Sep 17 00:00:00 2001
From: Lars Henriksen <LarsHenriksen@get2net.dk>
Date: Mon, 26 Mar 2018 18:44:08 +0200
Subject: Key bindings for UTF-8 encoded characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Internally characters (keys) have two representations: integers and key
names. Key names are characters strings, usually the name of the
character; e.g., the character A has the representations 65 and "A", and
the tab character the representations 9 and "TAB".

The function keys_int2str() turns the integer representation of a
key/character into the key name.

For display purposes the key names are usually confined to have display
width at most three. Some curses pseudo-keys have longer key names;
e.g., the back-tab character is "KEY_BTAB". A long key name makes a
character difficult to recognize in the status bar menu.

The key name of a multibyte, UTF-8 encoded character is the conventional
Unicode name of the code point; e.g., the character ü has key name
"U+00FC" because ü is the code point 0xFC. Most of these look alike in
the status bar menu.

The patch makes the key name of a multibyte character look like that of
a singlebyte character: the character itself, i.e. the key name of the
character ü is "ü".

The main tool is implementation of a utf8_encode() routine.

Signed-off-by: Lukas Fleischer <lfleischer@calcurse.org>
---
 src/calcurse.h |  1 +
 src/custom.c   |  2 +-
 src/keys.c     | 18 +++++++-----------
 src/utf8.c     | 39 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/calcurse.h b/src/calcurse.h
index c362055..0d77e8f 100644
--- a/src/calcurse.h
+++ b/src/calcurse.h
@@ -1119,6 +1119,7 @@ int utf8_decode(const char *);
 int utf8_width(char *);
 int utf8_strwidth(char *);
 int utf8_chop(char *, int);
+char *utf8_encode(int);
 
 /* utils.c */
 void exit_calcurse(int) __attribute__ ((__noreturn__));
diff --git a/src/custom.c b/src/custom.c
index 246af1d..9443365 100644
--- a/src/custom.c
+++ b/src/custom.c
@@ -908,7 +908,7 @@ print_keys_bindings(WINDOW * win, int selected_row, int selected_elm,
 						mvwprintw(win, y, pos,
 							  "%s ", key);
 					noelm++;
-					pos += strlen(key) + 1;
+					pos += utf8_strwidth((char *)key) + 1;
 				}
 			} else {
 				mvwaddstr(win, y, KEYPOS,
diff --git a/src/keys.c b/src/keys.c
index e609cb5..36bd64c 100644
--- a/src/keys.c
+++ b/src/keys.c
@@ -395,9 +395,6 @@ int keys_str2int(const char *key)
 	else if (strcmp(key, "KEY_END") == 0)
 		return KEY_END;
 
-	/* UTF-8 multibyte keys. */
-	if (starts_with(key, "U+"))
-		return strtol(&key[strlen("U+")], NULL, 16) + KEY_MAX;
 
 	/* Lookup in the keynames table. */
 	for (int i = 1; i < 128; i++)
@@ -407,8 +404,8 @@ int keys_str2int(const char *key)
 		if (strcmp(key, keynames[i]) == 0)
 			return i;
 
-
-	return -1;
+	/* UTF-8 multibyte keys. */
+	return utf8_decode(key) + KEY_MAX;
 }
 
 char *keys_int2str(int key)
@@ -421,7 +418,7 @@ char *keys_int2str(int key)
 		else
 			return mem_strdup(keynames[key]);
 	} else {
-		asprintf(&res, "U+%04X", key - KEY_MAX);
+		asprintf(&res, "%s", utf8_encode(key - KEY_MAX));
 		return res;
 	}
 }
@@ -476,7 +473,7 @@ char *keys_action_allkeys(enum key action)
 static char *keys_format_label(char *key, int keylen)
 {
 	static char fmtkey[BUFSIZ];
-	const int len = strlen(key);
+	const int len = utf8_strwidth(key);
 	const char dot = '.';
 	int i;
 
@@ -520,7 +517,7 @@ keys_display_bindings_bar(WINDOW * win, int *bindings, int count,
 		const int label_pos_x = key_pos_x + KEYS_KEYLEN + 1;
 		const int label_pos_y = key_pos_y;
 
-		char key[KEYS_KEYLEN + 1], *fmtkey;
+		char key[UTF8_MAXLEN + 1], *fmtkey;
 
 		int binding_key;
 
@@ -532,9 +529,8 @@ keys_display_bindings_bar(WINDOW * win, int *bindings, int count,
 		const char *label;
 
 		if (binding_key < NBKEYS) {
-			strncpy(key, keys_action_firstkey(binding_key),
-				KEYS_KEYLEN);
-			key[KEYS_KEYLEN] = '\0';
+			strncpy(key, keys_action_firstkey(binding_key), UTF8_MAXLEN);
+			key[UTF8_MAXLEN] = '\0';
 			label = gettext(keydef[binding_key].sb_label);
 		} else {
 			switch (binding_key) {
diff --git a/src/utf8.c b/src/utf8.c
index 6b04331..b1976af 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -291,7 +291,44 @@ int utf8_decode(const char *s)
 	}
 }
 
-/* Get the width of a UTF-8 character. */
+/*
+ * Encode a Unicode code point.
+ * Return a pointer to the resulting UTF-8 encoded character.
+ */
+char *utf8_encode(int u)
+{
+	static char c[5]; /* 4 bytes + string termination */
+
+	/* 0x0000 - 0x007F: 0xxxxxxx */
+	if (u < 0x80) {
+		*(c + 1) = '\0';
+		*c = u;
+	/* 0x0080 - 0x07FF: 110xxxxx 10xxxxxx */
+	} else if (u < 0x800) {
+		*(c + 2) = '\0';
+		*(c + 1) = (u       & 0x3F) | 0x80;
+		*c       = (u >> 6)         | 0xC0;
+	/* 0x0800 - 0xFFFF: 1110xxxx 10xxxxxx 10xxxxxx */
+	} else if (u < 0x10000) {
+		*(c + 3) = '\0';
+		*(c + 2) = (u       & 0x3F) | 0x80;
+		*(c + 1) = (u >> 6  & 0x3F) | 0x80;
+		*c       = (u >> 12)        | 0xE0;
+	} else if (u < 0x110000) {
+	/* 0x10000 - 0x10FFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+		*(c + 4) = '\0';
+		*(c + 3) = (u       & 0x3F) | 0x80;
+		*(c + 2) = (u >> 6  & 0x3F) | 0x80;
+		*(c + 1) = (u >> 12 & 0x3F) | 0x80;
+		*c       = (u >> 18)        | 0xF0;
+	} else {
+		return NULL;
+	}
+
+	return c;
+}
+
+/* Get the display width of a UTF-8 character. */
 int utf8_width(char *s)
 {
 	int val, low, high, cur;
-- 
cgit v1.2.3-70-g09d2