diff options
author | Lars Henriksen <LarsHenriksen@get2net.dk> | 2017-11-29 22:19:10 +0100 |
---|---|---|
committer | Lukas Fleischer <lfleischer@calcurse.org> | 2017-12-07 09:02:58 +0100 |
commit | 95c5d576fafa2f705e6562f57bab9a9d583c8776 (patch) | |
tree | 475b7ef3fb9831a094aec2eb562bef38a94be0f2 /src/utf8.c | |
parent | edc44d613bdc57566a48ea855af86a9df0b3d13d (diff) | |
download | calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.tar.gz calcurse-95c5d576fafa2f705e6562f57bab9a9d583c8776.zip |
Update UTF-8 base code
UTF-8 encodes characters in one to four bytes (since 2003).
Because 0 is a valid code point, the decode function utf8_ord()
should return -1, not 0, on error. As a consequence utf8_width()
should return 0 for a continuation byte (as it did previously).
Signed-off-by: Lukas Fleischer <lfleischer@calcurse.org>
Diffstat (limited to 'src/utf8.c')
-rw-r--r-- | src/utf8.c | 25 |
1 files changed, 8 insertions, 17 deletions
@@ -269,11 +269,11 @@ static const struct utf8_range utf8_widthtab[] = { {0xe0100, 0xe01ef, 0} }; -/* Decode a UTF-8 code point. */ +/* Decode a UTF-8 encoded character. Return the Unicode code point. */ int utf8_ord(const char *s) { if (UTF8_ISCONT(*s)) - return 0; + return -1; switch (UTF8_LENGTH(*s)) { case 1: @@ -285,17 +285,9 @@ int utf8_ord(const char *s) (s[0] & 0x0f) << 12; case 4: return (((s[3] & 0x3f) | (s[2] & 0x3f) << 6) | - (s[1] & 0x3f) << 12) | (s[0] & 0x3f) << 18; - case 5: - return ((((s[4] & 0x3f) | (s[3] & 0x3f) << 6) | - (s[2] & 0x3f) << 12) | (s[1] & 0x3f) << 18) | - (s[0] & 0x3f) << 24; - case 6: - return (((((s[5] & 0x3f) | (s[4] & 0x3f) << 6) | - (s[3] & 0x3f) << 12) | (s[2] & 0x3f) << 18) | - (s[1] & 0x3f) << 24) | (s[0] & 0x3f) << 30; + (s[1] & 0x3f) << 12) | (s[0] & 0x7) << 18; default: - return 0; + return -1; } } @@ -304,6 +296,8 @@ int utf8_width(char *s) { int val, low, high, cur; + if (UTF8_ISCONT(*s)) + return 0; val = utf8_ord(s); low = 0; high = ARRAY_SIZE(utf8_widthtab); @@ -328,11 +322,8 @@ int utf8_strwidth(char *s) { int width = 0; - for (; s && *s; s++) { - if (!UTF8_ISCONT(*s)) - width += utf8_width(s); - } - + for (; *s; s++) + width += utf8_width(s); return width; } |