Changeset View
Changeset View
Standalone View
Standalone View
extern/wcwidth/wcwidth.c
| Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
| * | * | ||||
| * Permission to use, copy, modify, and distribute this software | * Permission to use, copy, modify, and distribute this software | ||||
| * for any purpose and without fee is hereby granted. The author | * for any purpose and without fee is hereby granted. The author | ||||
| * disclaims all warranties with regard to this software. | * disclaims all warranties with regard to this software. | ||||
| * | * | ||||
| * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | ||||
| */ | */ | ||||
| #include <wchar.h> | |||||
| #include "wcwidth.h" | #include "wcwidth.h" | ||||
| struct interval { | struct interval { | ||||
| int first; | int first; | ||||
| int last; | int last; | ||||
| }; | }; | ||||
| /* auxiliary function for binary search in interval table */ | /* auxiliary function for binary search in interval table */ | ||||
| static int bisearch(wchar_t ucs, const struct interval *table, int max) { | static int bisearch(char32_t ucs, const struct interval *table, int max) { | ||||
| int min = 0; | int min = 0; | ||||
| int mid; | int mid; | ||||
| if (ucs < table[0].first || ucs > table[max].last) | if (ucs < table[0].first || ucs > table[max].last) | ||||
| return 0; | return 0; | ||||
| while (max >= min) { | while (max >= min) { | ||||
| mid = (min + max) / 2; | mid = (min + max) / 2; | ||||
| if (ucs > table[mid].last) | if (ucs > table[mid].last) | ||||
| Show All 35 Lines | |||||
| * - All remaining characters (including all printable | * - All remaining characters (including all printable | ||||
| * ISO 8859-1 and WGL4 characters, Unicode control characters, | * ISO 8859-1 and WGL4 characters, Unicode control characters, | ||||
| * etc.) have a column width of 1. | * etc.) have a column width of 1. | ||||
| * | * | ||||
| * This implementation assumes that wchar_t characters are encoded | * This implementation assumes that wchar_t characters are encoded | ||||
| * in ISO 10646. | * in ISO 10646. | ||||
| */ | */ | ||||
| int mk_wcwidth(wchar_t ucs) | int mk_wcwidth(char32_t ucs) | ||||
| { | { | ||||
| /* sorted list of non-overlapping intervals of non-spacing characters */ | /* sorted list of non-overlapping intervals of non-spacing characters */ | ||||
| /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | ||||
| static const struct interval combining[] = { | static const struct interval combining[] = { | ||||
| { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, | { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, | ||||
| { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, | { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, | ||||
| { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, | { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, | ||||
| { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, | { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, | ||||
| ▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | (ucs <= 0x115f || /* Hangul Jamo init. consonants */ | ||||
| (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ | (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ | ||||
| (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ | (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ | ||||
| (ucs >= 0xffe0 && ucs <= 0xffe6) || | (ucs >= 0xffe0 && ucs <= 0xffe6) || | ||||
| (ucs >= 0x20000 && ucs <= 0x2fffd) || | (ucs >= 0x20000 && ucs <= 0x2fffd) || | ||||
| (ucs >= 0x30000 && ucs <= 0x3fffd))); | (ucs >= 0x30000 && ucs <= 0x3fffd))); | ||||
| } | } | ||||
| int mk_wcswidth(const wchar_t *pwcs, size_t n) | int mk_wcswidth(const char32_t *pwcs, size_t n) | ||||
| { | { | ||||
| int w, width = 0; | int w, width = 0; | ||||
| for (;*pwcs && n-- > 0; pwcs++) | for (;*pwcs && n-- > 0; pwcs++) | ||||
| if ((w = mk_wcwidth(*pwcs)) < 0) | if ((w = mk_wcwidth(*pwcs)) < 0) | ||||
| return -1; | return -1; | ||||
| else | else | ||||
| width += w; | width += w; | ||||
| return width; | return width; | ||||
| } | } | ||||
| /* | /* | ||||
| * The following functions are the same as mk_wcwidth() and | * The following functions are the same as mk_wcwidth() and | ||||
| * mk_wcswidth(), except that spacing characters in the East Asian | * mk_wcswidth(), except that spacing characters in the East Asian | ||||
| * Ambiguous (A) category as defined in Unicode Technical Report #11 | * Ambiguous (A) category as defined in Unicode Technical Report #11 | ||||
| * have a column width of 2. This variant might be useful for users of | * have a column width of 2. This variant might be useful for users of | ||||
| * CJK legacy encodings who want to migrate to UCS without changing | * CJK legacy encodings who want to migrate to UCS without changing | ||||
| * the traditional terminal character-width behaviour. It is not | * the traditional terminal character-width behaviour. It is not | ||||
| * otherwise recommended for general use. | * otherwise recommended for general use. | ||||
| */ | */ | ||||
| int mk_wcwidth_cjk(wchar_t ucs) | int mk_wcwidth_cjk(char32_t ucs) | ||||
| { | { | ||||
| /* sorted list of non-overlapping intervals of East Asian Ambiguous | /* sorted list of non-overlapping intervals of East Asian Ambiguous | ||||
| * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ | * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ | ||||
| static const struct interval ambiguous[] = { | static const struct interval ambiguous[] = { | ||||
| { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, | { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, | ||||
| { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, | { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, | ||||
| { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, | { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, | ||||
| { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, | { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, | ||||
| ▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | int mk_wcwidth_cjk(char32_t ucs) | ||||
| if (bisearch(ucs, ambiguous, | if (bisearch(ucs, ambiguous, | ||||
| sizeof(ambiguous) / sizeof(struct interval) - 1)) | sizeof(ambiguous) / sizeof(struct interval) - 1)) | ||||
| return 2; | return 2; | ||||
| return mk_wcwidth(ucs); | return mk_wcwidth(ucs); | ||||
| } | } | ||||
| int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n) | int mk_wcswidth_cjk(const char32_t *pwcs, size_t n) | ||||
| { | { | ||||
| int w, width = 0; | int w, width = 0; | ||||
| for (;*pwcs && n-- > 0; pwcs++) | for (;*pwcs && n-- > 0; pwcs++) | ||||
| if ((w = mk_wcwidth_cjk(*pwcs)) < 0) | if ((w = mk_wcwidth_cjk(*pwcs)) < 0) | ||||
| return -1; | return -1; | ||||
| else | else | ||||
| width += w; | width += w; | ||||
| return width; | return width; | ||||
| } | } | ||||