Update UTF-8 base code

UTF-8 encodes characters in one to four bytes (since 2003). Because 0 is a valid code point, the decode function utf8_ord() should return -1, not 0, on error. As a consequence utf8_width() should return 0 for a continuation byte (as it did previously). Signed-off-by: Lukas Fleischer <lfleischer@calcurse.org>
author: Lars Henriksen <LarsHenriksen@get2net.dk> 2017-11-29 22:19:10 +0100
committer: Lukas Fleischer <lfleischer@calcurse.org> 2017-12-07 09:02:58 +0100
commit: 95c5d576fafa2f705e6562f57bab9a9d583c8776 (patch)
tree: 475b7ef3fb9831a094aec2eb562bef38a94be0f2 /src/utf8.c
parent: edc44d613bdc57566a48ea855af86a9df0b3d13d (diff)
download: calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.tar.gz
calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.zip
1 files changed, 8 insertions, 17 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 47d83dc..e7754ae 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -269,11 +269,11 @@ static const struct utf8_range utf8_widthtab[] = {
 	{0xe0100, 0xe01ef, 0}
 };
 
-/* Decode a UTF-8 code point. */
+/* Decode a UTF-8 encoded character. Return the Unicode code point. */
 int utf8_ord(const char *s)
 {
 	if (UTF8_ISCONT(*s))
-		return 0;
+		return -1;
 
 	switch (UTF8_LENGTH(*s)) {
 	case 1:
@@ -285,17 +285,9 @@ int utf8_ord(const char *s)
 			(s[0] & 0x0f) << 12;
 	case 4:
 		return (((s[3] & 0x3f) | (s[2] & 0x3f) << 6) |
-			(s[1] & 0x3f) << 12) | (s[0] & 0x3f) << 18;
-	case 5:
-		return ((((s[4] & 0x3f) | (s[3] & 0x3f) << 6) |
-			(s[2] & 0x3f) << 12) | (s[1] & 0x3f) << 18) |
-			(s[0] & 0x3f) << 24;
-	case 6:
-		return (((((s[5] & 0x3f) | (s[4] & 0x3f) << 6) |
-			(s[3] & 0x3f) << 12) | (s[2] & 0x3f) << 18) |
-			(s[1] & 0x3f) << 24) | (s[0] & 0x3f) << 30;
+			(s[1] & 0x3f) << 12) | (s[0] & 0x7) << 18;
 	default:
-		return 0;
+		return -1;
 	}
 }
 
@@ -304,6 +296,8 @@ int utf8_width(char *s)
 {
 	int val, low, high, cur;
 
+	if (UTF8_ISCONT(*s))
+		return 0;
 	val = utf8_ord(s);
 	low = 0;
 	high = ARRAY_SIZE(utf8_widthtab);
@@ -328,11 +322,8 @@ int utf8_strwidth(char *s)
 {
 	int width = 0;
 
-	for (; s && *s; s++) {
-		if (!UTF8_ISCONT(*s))
-			width += utf8_width(s);
-	}
-
+	for (; *s; s++)
+		width += utf8_width(s);
 	return width;
 }
author	Lars Henriksen <LarsHenriksen@get2net.dk>	2017-11-29 22:19:10 +0100
committer	Lukas Fleischer <lfleischer@calcurse.org>	2017-12-07 09:02:58 +0100
commit	95c5d576fafa2f705e6562f57bab9a9d583c8776 (patch)
tree	475b7ef3fb9831a094aec2eb562bef38a94be0f2 /src/utf8.c
parent	edc44d613bdc57566a48ea855af86a9df0b3d13d (diff)
download	calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.tar.gz calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.zip