diff options
Diffstat (limited to 'src/common.c')
-rw-r--r-- | src/common.c | 183 |
1 files changed, 168 insertions, 15 deletions
diff --git a/src/common.c b/src/common.c index 38f51ea..03dd81c 100644 --- a/src/common.c +++ b/src/common.c @@ -176,19 +176,14 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read) uint32_t c, aux; size_t len; - if (bytes_read) { - (*bytes_read) = 0; - } - c = (*input)[0]; - LY_CHECK_RET(!c, LY_EINVAL); if (!(c & 0x80)) { /* one byte character */ len = 1; if ((c < 0x20) && (c != 0x9) && (c != 0xa) && (c != 0xd)) { - return LY_EINVAL; + goto error; } } else if ((c & 0xe0) == 0xc0) { /* two bytes character */ @@ -196,12 +191,12 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read) aux = (*input)[1]; if ((aux & 0xc0) != 0x80) { - return LY_EINVAL; + goto error; } c = ((c & 0x1f) << 6) | (aux & 0x3f); if (c < 0x80) { - return LY_EINVAL; + goto error; } } else if ((c & 0xf0) == 0xe0) { /* three bytes character */ @@ -211,14 +206,14 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read) for (uint64_t i = 1; i <= 2; i++) { aux = (*input)[i]; if ((aux & 0xc0) != 0x80) { - return LY_EINVAL; + goto error; } c = (c << 6) | (aux & 0x3f); } if ((c < 0x800) || ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) { - return LY_EINVAL; + goto error; } } else if ((c & 0xf8) == 0xf0) { /* four bytes character */ @@ -228,17 +223,17 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read) for (uint64_t i = 1; i <= 3; i++) { aux = (*input)[i]; if ((aux & 0xc0) != 0x80) { - return LY_EINVAL; + goto error; } c = (c << 6) | (aux & 0x3f); } if ((c < 0x1000) || (c > 0x10ffff)) { - return LY_EINVAL; + goto error; } } else { - return LY_EINVAL; + goto error; } (*utf8_char) = c; @@ -247,6 +242,163 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read) (*bytes_read) = len; } return LY_SUCCESS; + +error: + if (bytes_read) { + (*bytes_read) = 0; + } + return LY_EINVAL; +} + +/** + * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and. + * + * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]... + * + * @param[in] input UTF-8 string. + * @param[in] bytes Number of bytes to compare. + * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations. + * @return Result of the operation. + */ +static int +ly_utf8_and_equal(const char *input, uint8_t bytes, ...) +{ + va_list ap; + int i, and, byte; + + va_start(ap, bytes); + for (i = 0; i < bytes; ++i) { + and = va_arg(ap, int); + byte = va_arg(ap, int); + + /* compare each byte */ + if (((uint8_t)input[i] & and) != (uint8_t)byte) { + return 0; + } + } + va_end(ap); + + return 1; +} + +/** + * @brief Check whether an UTF-8 string is smaller than a hex string. + * + * input < 0x[arg1][arg2]... + * + * @param[in] input UTF-8 string. + * @param[in] bytes Number of bytes to compare. + * @param[in] ... @p bytes number of bytes to compare with. + * @return Result of the operation. + */ +static int +ly_utf8_less(const char *input, uint8_t bytes, ...) +{ + va_list ap; + int i, byte; + + va_start(ap, bytes); + for (i = 0; i < bytes; ++i) { + byte = va_arg(ap, int); + + /* compare until bytes differ */ + if ((uint8_t)input[i] > (uint8_t)byte) { + return 0; + } else if ((uint8_t)input[i] < (uint8_t)byte) { + return 1; + } + } + va_end(ap); + + /* equals */ + return 0; +} + +/** + * @brief Check whether an UTF-8 string is greater than a hex string. + * + * input > 0x[arg1][arg2]... + * + * @param[in] input UTF-8 string. + * @param[in] bytes Number of bytes to compare. + * @param[in] ... @p bytes number of bytes to compare with. + * @return Result of the operation. + */ +static int +ly_utf8_greater(const char *input, uint8_t bytes, ...) +{ + va_list ap; + int i, byte; + + va_start(ap, bytes); + for (i = 0; i < bytes; ++i) { + byte = va_arg(ap, int); + + /* compare until bytes differ */ + if ((uint8_t)input[i] > (uint8_t)byte) { + return 1; + } else if ((uint8_t)input[i] < (uint8_t)byte) { + return 0; + } + } + va_end(ap); + + /* equals */ + return 0; +} + +LY_ERR +ly_checkutf8(const char *input, size_t in_len, size_t *utf8_len) +{ + size_t len; + + if (!(input[0] & 0x80)) { + /* one byte character */ + len = 1; + + if (ly_utf8_less(input, 1, 0x20) && (input[0] != 0x9) && (input[0] != 0xa) && (input[0] != 0xd)) { + /* invalid control characters */ + return LY_EINVAL; + } + } else if (((input[0] & 0xe0) == 0xc0) && (in_len > 1)) { + /* two bytes character */ + len = 2; + + /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */ + if (ly_utf8_less(input, 2, 0xC2, 0x80) || ly_utf8_greater(input, 2, 0xDF, 0xBF) || + !ly_utf8_and_equal(input, 2, 0xE0, 0xC0, 0xC0, 0x80)) { + return LY_EINVAL; + } + } else if (((input[0] & 0xf0) == 0xe0) && (in_len > 2)) { + /* three bytes character */ + len = 3; + + /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */ + if (!ly_utf8_less(input, 3, 0xED, 0xA0, 0x80) && !ly_utf8_greater(input, 3, 0xED, 0xBF, 0xBF)) { + /* reject UTF-16 surrogates */ + return LY_EINVAL; + } + + /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */ + if (ly_utf8_less(input, 3, 0xE0, 0xA0, 0x80) || ly_utf8_greater(input, 3, 0xEF, 0xBF, 0xBF) || + !ly_utf8_and_equal(input, 3, 0xF0, 0xE0, 0xC0, 0x80, 0xC0, 0x80)) { + return LY_EINVAL; + } + } else if (((input[0] & 0xf8) == 0xf0) && (in_len > 3)) { + /* four bytes character */ + len = 4; + + /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */ + if (ly_utf8_less(input, 4, 0xF0, 0x90, 0x80, 0x80) || ly_utf8_greater(input, 4, 0xF4, 0x8F, 0xBF, 0xBF) || + !ly_utf8_and_equal(input, 4, 0xF8, 0xF0, 0xC0, 0x80, 0xC0, 0x80, 0xC0, 0x80)) { + return LY_EINVAL; + } + } else { + return LY_EINVAL; + } + + *utf8_len = len; + return LY_SUCCESS; } LY_ERR @@ -258,6 +410,7 @@ ly_pututf8(char *dst, uint32_t value, size_t *bytes_written) (value != 0x09) && (value != 0x0a) && (value != 0x0d)) { + /* valid UTF8 but not YANG string character */ return LY_EINVAL; } @@ -337,10 +490,10 @@ ly_utf8len(const char *str, size_t bytes) return len; } -size_t +int LY_VCODE_INSTREXP_len(const char *str) { - size_t len = 0; + int len = 0; if (!str) { return len; |