12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- #include "utf8.h"
- const char *decode_utf8(const char *str, int32_t *code_point) {
- const char *next;
- if (str[0] < 0x80) {
- *code_point = str[0];
- next = str + 1;
- } else if ((str[0] & 0xE0) == 0xC0) {
- *code_point = ((int32_t)(str[0] & 0x1F) << 6) | ((int32_t)(str[1] & 0x3F) << 0);
- next = str + 2;
- } else if ((str[0] & 0xF0) == 0xE0) {
- *code_point = ((int32_t)(str[0] & 0x0F) << 12) | ((int32_t)(str[1] & 0x3F) << 6) | ((int32_t)(str[2] & 0x3F) << 0);
- next = str + 3;
- } else if ((str[0] & 0xF8) == 0xF0 && (str[0] <= 0xF4)) {
- *code_point = ((int32_t)(str[0] & 0x07) << 18) | ((int32_t)(str[1] & 0x3F) << 12) | ((int32_t)(str[2] & 0x3F) << 6) | ((int32_t)(str[3] & 0x3F) << 0);
- next = str + 4;
- } else {
- *code_point = -1;
- next = str + 1;
- }
-
- if (*code_point >= 0xD800 && *code_point <= 0xDFFF) {
- *code_point = -1;
- }
- return next;
- }
|