|
@@ -250,36 +250,72 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
|
|
|
static bool
|
|
static bool
|
|
|
check_utf8_str(const uint8* str, uint32 len)
|
|
check_utf8_str(const uint8* str, uint32 len)
|
|
|
{
|
|
{
|
|
|
- const uint8 *p = str, *p_end = str + len, *p_end1;
|
|
|
|
|
- uint8 chr, n_bytes;
|
|
|
|
|
|
|
+ /* The valid ranges are taken from page 125, below link
|
|
|
|
|
+ https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf */
|
|
|
|
|
+ const uint8 *p = str, *p_end = str + len;
|
|
|
|
|
+ uint8 chr;
|
|
|
|
|
|
|
|
while (p < p_end) {
|
|
while (p < p_end) {
|
|
|
- chr = *p++;
|
|
|
|
|
- if (chr >= 0x80) {
|
|
|
|
|
- /* Calculate the byte count: the first byte must be
|
|
|
|
|
- 110XXXXX, 1110XXXX, 11110XXX, 111110XX, or 1111110X,
|
|
|
|
|
- the count of leading '1' denotes the total byte count */
|
|
|
|
|
- n_bytes = 0;
|
|
|
|
|
- while ((chr & 0x80) != 0) {
|
|
|
|
|
- chr = (uint8)(chr << 1);
|
|
|
|
|
- n_bytes++;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- /* Check byte count */
|
|
|
|
|
- if (n_bytes < 2 || n_bytes > 6
|
|
|
|
|
- || p + n_bytes - 1 > p_end)
|
|
|
|
|
|
|
+ chr = *p;
|
|
|
|
|
+ if (chr < 0x80) {
|
|
|
|
|
+ p++;
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr >= 0xC2 && chr <= 0xDF && p + 1 < p_end) {
|
|
|
|
|
+ if (p[1] < 0x80 || p[1] > 0xBF) {
|
|
|
return false;
|
|
return false;
|
|
|
-
|
|
|
|
|
- /* Check the following bytes, which must be 10XXXXXX */
|
|
|
|
|
- p_end1 = p + n_bytes - 1;
|
|
|
|
|
- while (p < p_end1) {
|
|
|
|
|
- if (!(*p & 0x80) || (*p | 0x40))
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ p += 2;
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr >= 0xE0 && chr <= 0xEF && p + 2 < p_end) {
|
|
|
|
|
+ if (chr == 0xE0) {
|
|
|
|
|
+ if (p[1] < 0xA0 || p[1] > 0xBF
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF) {
|
|
|
return false;
|
|
return false;
|
|
|
- p++;
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr == 0xED) {
|
|
|
|
|
+ if (p[1] < 0x80 || p[1] > 0x9F
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr >= 0xE1 && chr <= 0xEF) {
|
|
|
|
|
+ if (p[1] < 0x80 || p[1] > 0xBF
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+ p += 3;
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr >= 0xF0 && chr <= 0xF4 && p + 3 < p_end) {
|
|
|
|
|
+ if (chr == 0xF0) {
|
|
|
|
|
+ if (p[1] < 0x90 || p[1] > 0xBF
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF
|
|
|
|
|
+ || p[3] < 0x80 || p[3] > 0xBF) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr >= 0xF1 && chr <= 0xF3) {
|
|
|
|
|
+ if (p[1] < 0x80 || p[1] > 0xBF
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF
|
|
|
|
|
+ || p[3] < 0x80 || p[3] > 0xBF) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (chr == 0xF4) {
|
|
|
|
|
+ if (p[1] < 0x80 || p[1] > 0x8F
|
|
|
|
|
+ || p[2] < 0x80 || p[2] > 0xBF
|
|
|
|
|
+ || p[3] < 0x80 || p[3] > 0xBF) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ p += 4;
|
|
|
|
|
+ }
|
|
|
|
|
+ else {
|
|
|
|
|
+ return false;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- return true;
|
|
|
|
|
|
|
+ return (p == p_end);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static char*
|
|
static char*
|