Changeset View
Changeset View
Standalone View
Standalone View
source/blender/blenlib/intern/string_utf8.c
| Show First 20 Lines • Show All 483 Lines • ▼ Show 20 Lines | int BLI_str_utf8_char_width_safe(const char *p) | ||||
| return (columns < 0) ? 1 : columns; | return (columns < 0) ? 1 : columns; | ||||
| } | } | ||||
| /* --------------------------------------------------------------------------*/ | /* --------------------------------------------------------------------------*/ | ||||
| /* copied from glib's gutf8.c, added 'Err' arg */ | /* copied from glib's gutf8.c, added 'Err' arg */ | ||||
| /* note, glib uses uint for unicode, best we do the same, | /* note, glib uses uint for unicode, best we do the same - campbell */ | ||||
| * though we don't typedef it - campbell */ | |||||
| #define UTF8_COMPUTE(Char, Mask, Len, Err) \ | #define UTF8_COMPUTE(Char, Mask, Len, Err) \ | ||||
| if (Char < 128) { \ | if (Char < 128) { \ | ||||
| Len = 1; \ | Len = 1; \ | ||||
| Mask = 0x7f; \ | Mask = 0x7f; \ | ||||
| } \ | } \ | ||||
| else if ((Char & 0xe0) == 0xc0) { \ | else if ((Char & 0xe0) == 0xc0) { \ | ||||
| Len = 2; \ | Len = 2; \ | ||||
| ▲ Show 20 Lines • Show All 219 Lines • ▼ Show 20 Lines | for (i = len - 1; i > 0; i--) { | ||||
| c >>= 6; | c >>= 6; | ||||
| } | } | ||||
| outbuf[0] = c | first; | outbuf[0] = c | first; | ||||
| } | } | ||||
| return len; | return len; | ||||
| } | } | ||||
| size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, | |||||
| const char *__restrict src_c, | |||||
| const size_t maxncpy) | |||||
| { | |||||
| const size_t maxlen = maxncpy - 1; | |||||
| size_t len = 0; | |||||
| BLI_assert(maxncpy != 0); | |||||
| #ifdef DEBUG_STRSIZE | |||||
| memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy); | |||||
| #endif | |||||
| while (*src_c && len != maxlen) { | |||||
| size_t step = 0; | |||||
| uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step); | |||||
| if (unicode != BLI_UTF8_ERR) { | |||||
| *dst_w = unicode; | |||||
| src_c += step; | |||||
| } | |||||
| else { | |||||
| *dst_w = '?'; | |||||
| src_c = BLI_str_find_next_char_utf8(src_c, NULL); | |||||
| } | |||||
| dst_w++; | |||||
| len++; | |||||
| } | |||||
| *dst_w = 0; | |||||
| return len; | |||||
| } | |||||
| size_t BLI_str_utf32_as_utf8(char *__restrict dst, | |||||
| const char32_t *__restrict src, | |||||
| const size_t maxncpy) | |||||
| { | |||||
| const size_t maxlen = maxncpy - 1; | |||||
| /* 6 is max utf8 length of an unicode char. */ | |||||
| const int64_t maxlen_secured = (int64_t)maxlen - 6; | |||||
| size_t len = 0; | |||||
| BLI_assert(maxncpy != 0); | |||||
| #ifdef DEBUG_STRSIZE | |||||
| memset(dst, 0xff, sizeof(*dst) * maxncpy); | |||||
| #endif | |||||
| while (*src && len <= maxlen_secured) { | |||||
| len += BLI_str_utf8_from_unicode((uint)*src++, dst + len); | |||||
| } | |||||
| /* We have to be more careful for the last six bytes, | |||||
| * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */ | |||||
| while (*src) { | |||||
| char t[6]; | |||||
| size_t l = BLI_str_utf8_from_unicode((uint)*src++, t); | |||||
| BLI_assert(l <= 6); | |||||
| if (len + l > maxlen) { | |||||
| break; | |||||
| } | |||||
| memcpy(dst + len, t, l); | |||||
| len += l; | |||||
| } | |||||
| dst[len] = '\0'; | |||||
| return len; | |||||
| } | |||||
| /* len of utf8 in unicode array */ | |||||
| size_t BLI_str_utf32_as_utf8_len(const char32_t *src) | |||||
| { | |||||
| size_t len = 0; | |||||
| while (*src) { | |||||
| len += BLI_str_utf8_from_unicode((uint)*src++, NULL); | |||||
| } | |||||
| return len; | |||||
| } | |||||
| /* was g_utf8_find_prev_char */ | /* was g_utf8_find_prev_char */ | ||||
| /** | /** | ||||
| * BLI_str_find_prev_char_utf8: | * BLI_str_find_prev_char_utf8: | ||||
| * \param str: pointer to the beginning of a UTF-8 encoded string | * \param str: pointer to the beginning of a UTF-8 encoded string | ||||
| * \param p: pointer to some position within \a str | * \param p: pointer to some position within \a str | ||||
| * | * | ||||
| * Given a position \a p with a UTF-8 encoded string \a str, find the start | * Given a position \a p with a UTF-8 encoded string \a str, find the start | ||||
| * of the previous UTF-8 character starting before. \a p Returns %NULL if no | * of the previous UTF-8 character starting before. \a p Returns %NULL if no | ||||
| ▲ Show 20 Lines • Show All 184 Lines • Show Last 20 Lines | |||||