- 95 static int DecodeUtf8Char(uint8 *data, size_t length, uint32 *codepoint)
- 96 {
- 97 if (length < 1) return 0;
- 98 uint32 value = *data;
- 99 data++;
- 100 if ((value & 0x80) == 0) {
- 101 *codepoint = value;
- 102 return 1;
- 103 }
- 104 int size;
- 105 uint32 min_value;
- 106 if ((value & 0xE0) == 0xC0) {
- 107 size = 2;
- 108 min_value = 0x80;
- 109 value &= 0x1F;
- 110 } else if ((value & 0xF0) == 0xE0) {
- 111 size = 3;
- 112 min_value = 0x800;
- 113 value &= 0x0F;
- 114 } else if ((value & 0xF8) == 0xF0) {
- 115 size = 4;
- 116 min_value = 0x10000;
- 117 value &= 0x07;
- 118 } else {
- 119 return 0;
- 120 }
- 121
- 122 if (length < static_cast<size_t>(size)) return 0;
- 123 for (int n = 1; n < size; n++) {
- 124 uint8 val = *data;
- 125 data++;
- 126 if ((val & 0xC0) != 0x80) return 0;
- 127 value = (value << 6) | (val & 0x3F);
- 128 }
- 129 if (value < min_value || (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) return 0;
- 130 *codepoint = value;
- 131 return size;
- 132 }