Changeset View
Changeset View
Standalone View
Standalone View
source/blender/blenlib/intern/string_utf8.c
| Show First 20 Lines • Show All 513 Lines • ▼ Show 20 Lines | |||||
| * | * | ||||
| * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. | * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. | ||||
| * If \a p does not point to a valid UTF-8 encoded character, results are | * If \a p does not point to a valid UTF-8 encoded character, results are | ||||
| * undefined. If you are not sure that the bytes are complete | * undefined. If you are not sure that the bytes are complete | ||||
| * valid Unicode characters, you should use g_utf8_get_char_validated() | * valid Unicode characters, you should use g_utf8_get_char_validated() | ||||
| * instead. | * instead. | ||||
| * | * | ||||
| * Return value: the resulting character | * Return value: the resulting character | ||||
| **/ | */ | ||||
| uint BLI_str_utf8_as_unicode(const char *p) | uint BLI_str_utf8_as_unicode(const char *p) | ||||
| { | { | ||||
| int i, len; | int i, len; | ||||
| uint mask = 0; | uint mask = 0; | ||||
| uint result; | uint result; | ||||
| const unsigned char c = (unsigned char) *p; | const unsigned char c = (unsigned char) *p; | ||||
| UTF8_COMPUTE(c, mask, len, -1); | UTF8_COMPUTE(c, mask, len, -1); | ||||
| ▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | |||||
| * \param c: a Unicode character code | * \param c: a Unicode character code | ||||
| * \param outbuf: output buffer, must have at least 6 bytes of space. | * \param outbuf: output buffer, must have at least 6 bytes of space. | ||||
| * If %NULL, the length will be computed and returned | * If %NULL, the length will be computed and returned | ||||
| * and nothing will be written to outbuf. | * and nothing will be written to outbuf. | ||||
| * | * | ||||
| * Converts a single character to UTF-8. | * Converts a single character to UTF-8. | ||||
| * | * | ||||
| * \return number of bytes written | * \return number of bytes written | ||||
| **/ | */ | ||||
| size_t BLI_str_utf8_from_unicode(uint c, char *outbuf) | size_t BLI_str_utf8_from_unicode(uint c, char *outbuf) | ||||
| { | { | ||||
| /* If this gets modified, also update the copy in g_string_insert_unichar() */ | /* If this gets modified, also update the copy in g_string_insert_unichar() */ | ||||
| uint len = 0; | uint len = 0; | ||||
| uint first; | uint first; | ||||
| uint i; | uint i; | ||||
| if (c < 0x80) { | if (c < 0x80) { | ||||
| ▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | |||||
| * of the previous UTF-8 character starting before. \a p Returns %NULL if no | * of the previous UTF-8 character starting before. \a p Returns %NULL if no | ||||
| * UTF-8 characters are present in \a str before \a p | * UTF-8 characters are present in \a str before \a p | ||||
| * | * | ||||
| * \a p does not have to be at the beginning of a UTF-8 character. No check | * \a p does not have to be at the beginning of a UTF-8 character. No check | ||||
| * is made to see if the character found is actually valid other than | * is made to see if the character found is actually valid other than | ||||
| * it starts with an appropriate byte. | * it starts with an appropriate byte. | ||||
| * | * | ||||
| * Return value: a pointer to the found character or %NULL. | * Return value: a pointer to the found character or %NULL. | ||||
| **/ | */ | ||||
| char *BLI_str_find_prev_char_utf8(const char *str, const char *p) | char *BLI_str_find_prev_char_utf8(const char *str, const char *p) | ||||
| { | { | ||||
| for (--p; p >= str; --p) { | for (--p; p >= str; --p) { | ||||
| if ((*p & 0xc0) != 0x80) { | if ((*p & 0xc0) != 0x80) { | ||||
| return (char *)p; | return (char *)p; | ||||
| } | } | ||||
| } | } | ||||
| return NULL; | return NULL; | ||||
| } | } | ||||
| /* was g_utf8_find_next_char */ | /* was g_utf8_find_next_char */ | ||||
| /** | /** | ||||
| * BLI_str_find_next_char_utf8: | * BLI_str_find_next_char_utf8: | ||||
| * \param p: a pointer to a position within a UTF-8 encoded string | * \param p: a pointer to a position within a UTF-8 encoded string | ||||
| * \param end: a pointer to the byte following the end of the string, | * \param end: a pointer to the byte following the end of the string, | ||||
| * or %NULL to indicate that the string is nul-terminated. | * or %NULL to indicate that the string is nul-terminated. | ||||
| * | * | ||||
| * Finds the start of the next UTF-8 character in the string after \a p | * Finds the start of the next UTF-8 character in the string after \a p | ||||
| * | * | ||||
| * \a p does not have to be at the beginning of a UTF-8 character. No check | * \a p does not have to be at the beginning of a UTF-8 character. No check | ||||
| * is made to see if the character found is actually valid other than | * is made to see if the character found is actually valid other than | ||||
| * it starts with an appropriate byte. | * it starts with an appropriate byte. | ||||
| * | * | ||||
| * Return value: a pointer to the found character or %NULL | * Return value: a pointer to the found character or %NULL | ||||
| **/ | */ | ||||
| char *BLI_str_find_next_char_utf8(const char *p, const char *end) | char *BLI_str_find_next_char_utf8(const char *p, const char *end) | ||||
| { | { | ||||
| if (*p) { | if (*p) { | ||||
| if (end) { | if (end) { | ||||
| for (++p; p < end && (*p & 0xc0) == 0x80; ++p) { | for (++p; p < end && (*p & 0xc0) == 0x80; ++p) { | ||||
| /* do nothing */ | /* do nothing */ | ||||
| } | } | ||||
| } | } | ||||
| Show All 14 Lines | |||||
| * Finds the previous UTF-8 character in the string before \a p | * Finds the previous UTF-8 character in the string before \a p | ||||
| * | * | ||||
| * \a p does not have to be at the beginning of a UTF-8 character. No check | * \a p does not have to be at the beginning of a UTF-8 character. No check | ||||
| * is made to see if the character found is actually valid other than | * is made to see if the character found is actually valid other than | ||||
| * it starts with an appropriate byte. If \a p might be the first | * it starts with an appropriate byte. If \a p might be the first | ||||
| * character of the string, you must use g_utf8_find_prev_char() instead. | * character of the string, you must use g_utf8_find_prev_char() instead. | ||||
| * | * | ||||
| * Return value: a pointer to the found character. | * Return value: a pointer to the found character. | ||||
| **/ | */ | ||||
| char *BLI_str_prev_char_utf8(const char *p) | char *BLI_str_prev_char_utf8(const char *p) | ||||
| { | { | ||||
| while (1) { | while (1) { | ||||
| p--; | p--; | ||||
| if ((*p & 0xc0) != 0x80) { | if ((*p & 0xc0) != 0x80) { | ||||
| return (char *)p; | return (char *)p; | ||||
| } | } | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines | |||||