From be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 04:57:58 +0200 Subject: Adding upstream version 1.44.3. Signed-off-by: Daniel Baumann --- web/server/h2o/libh2o/lib/common/string.c | 594 ++++++++++++++++++++++++++++++ 1 file changed, 594 insertions(+) create mode 100644 web/server/h2o/libh2o/lib/common/string.c (limited to 'web/server/h2o/libh2o/lib/common/string.c') diff --git a/web/server/h2o/libh2o/lib/common/string.c b/web/server/h2o/libh2o/lib/common/string.c new file mode 100644 index 00000000..3c068f3a --- /dev/null +++ b/web/server/h2o/libh2o/lib/common/string.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2014-2016 DeNA Co., Ltd., Kazuho Oku, Justin Zhu, Fastly, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "h2o/string_.h" + +h2o_iovec_t h2o_strdup(h2o_mem_pool_t *pool, const char *s, size_t slen) +{ + h2o_iovec_t ret; + + if (slen == SIZE_MAX) + slen = strlen(s); + + if (pool != NULL) { + ret.base = h2o_mem_alloc_pool(pool, slen + 1); + } else { + ret.base = h2o_mem_alloc(slen + 1); + } + h2o_memcpy(ret.base, s, slen); + ret.base[slen] = '\0'; + ret.len = slen; + return ret; +} + +h2o_iovec_t h2o_strdup_shared(h2o_mem_pool_t *pool, const char *s, size_t slen) +{ + h2o_iovec_t ret; + + if (slen == SIZE_MAX) + slen = strlen(s); + + ret.base = h2o_mem_alloc_shared(pool, slen + 1, NULL); + memcpy(ret.base, s, slen); + ret.base[slen] = '\0'; + ret.len = slen; + return ret; +} + +h2o_iovec_t h2o_strdup_slashed(h2o_mem_pool_t *pool, const char *src, size_t len) +{ + h2o_iovec_t ret; + + ret.len = len != SIZE_MAX ? len : strlen(src); + ret.base = pool != NULL ? h2o_mem_alloc_pool(pool, ret.len + 2) : h2o_mem_alloc(ret.len + 2); + memcpy(ret.base, src, ret.len); + if (ret.len != 0 && ret.base[ret.len - 1] != '/') + ret.base[ret.len++] = '/'; + ret.base[ret.len] = '\0'; + + return ret; +} + +int h2o__lcstris_core(const char *target, const char *test, size_t test_len) +{ + for (; test_len != 0; --test_len) + if (h2o_tolower(*target++) != *test++) + return 0; + return 1; +} + +size_t h2o_strtosize(const char *s, size_t len) +{ + uint64_t v = 0, m = 1; + const char *p = s + len; + + if (len == 0) + goto Error; + + while (1) { + int ch = *--p; + if (!('0' <= ch && ch <= '9')) + goto Error; + v += (ch - '0') * m; + if (p == s) + break; + m *= 10; + /* do not even try to overflow */ + if (m == 10000000000000000000ULL) + goto Error; + } + + if (v >= SIZE_MAX) + goto Error; + return v; + +Error: + return SIZE_MAX; +} + +size_t h2o_strtosizefwd(char **s, size_t len) +{ + uint64_t v, c; + char *p = *s, *p_end = *s + len; + + if (len == 0) + goto Error; + + int ch = *p++; + if (!('0' <= ch && ch <= '9')) + goto Error; + v = ch - '0'; + c = 1; + + while (1) { + ch = *p; + if (!('0' <= ch && ch <= '9')) + break; + v *= 10; + v += ch - '0'; + p++; + c++; + if (p == p_end) + break; + /* similar as above, do not even try to overflow */ + if (c == 20) + goto Error; + } + + if (v >= SIZE_MAX) + goto Error; + *s = p; + return v; + +Error: + return SIZE_MAX; +} + +static uint32_t decode_base64url_quad(const char *src) +{ + const char *src_end = src + 4; + uint32_t decoded = 0; + + while (1) { + if ('A' <= *src && *src <= 'Z') { + decoded |= *src - 'A'; + } else if ('a' <= *src && *src <= 'z') { + decoded |= *src - 'a' + 26; + } else if ('0' <= *src && *src <= '9') { + decoded |= *src - '0' + 52; + } else if (*src == '-') { + decoded |= 62; + } else if (*src == '_') { + decoded |= 63; +#if 1 /* curl uses normal base64 */ + } else if (*src == '+') { + decoded |= 62; + } else if (*src == '/') { + decoded |= 63; +#endif + } else { + return UINT32_MAX; + } + if (++src == src_end) + break; + decoded <<= 6; + } + + return decoded; +} + +h2o_iovec_t h2o_decode_base64url(h2o_mem_pool_t *pool, const char *src, size_t len) +{ + h2o_iovec_t decoded; + uint32_t t; + uint8_t *dst; + char remaining_input[4]; + + decoded.len = len * 3 / 4; + decoded.base = pool != NULL ? h2o_mem_alloc_pool(pool, decoded.len + 1) : h2o_mem_alloc(decoded.len + 1); + dst = (uint8_t *)decoded.base; + + while (len >= 4) { + if ((t = decode_base64url_quad(src)) == UINT32_MAX) + goto Error; + *dst++ = t >> 16; + *dst++ = t >> 8; + *dst++ = t; + src += 4; + len -= 4; + } + switch (len) { + case 0: + break; + case 1: + goto Error; + case 2: + remaining_input[0] = *src++; + remaining_input[1] = *src++; + remaining_input[2] = 'A'; + remaining_input[3] = 'A'; + if ((t = decode_base64url_quad(remaining_input)) == UINT32_MAX) + goto Error; + *dst++ = t >> 16; + break; + case 3: + remaining_input[0] = *src++; + remaining_input[1] = *src++; + remaining_input[2] = *src++; + remaining_input[3] = 'A'; + if ((t = decode_base64url_quad(remaining_input)) == UINT32_MAX) + goto Error; + *dst++ = t >> 16; + *dst++ = t >> 8; + break; + } + + assert((char *)dst - decoded.base == decoded.len); + decoded.base[decoded.len] = '\0'; + + return decoded; + +Error: + if (pool == NULL) + free(decoded.base); + return h2o_iovec_init(NULL, 0); +} + +size_t h2o_base64_encode(char *_dst, const void *_src, size_t len, int url_encoded) +{ + static const char *MAP = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + static const char *MAP_URL_ENCODED = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_"; + + char *dst = _dst; + const uint8_t *src = _src; + const char *map = url_encoded ? MAP_URL_ENCODED : MAP; + uint32_t quad; + + for (; len >= 3; src += 3, len -= 3) { + quad = ((uint32_t)src[0] << 16) | ((uint32_t)src[1] << 8) | src[2]; + *dst++ = map[quad >> 18]; + *dst++ = map[(quad >> 12) & 63]; + *dst++ = map[(quad >> 6) & 63]; + *dst++ = map[quad & 63]; + } + if (len != 0) { + quad = (uint32_t)src[0] << 16; + *dst++ = map[quad >> 18]; + if (len == 2) { + quad |= (uint32_t)src[1] << 8; + *dst++ = map[(quad >> 12) & 63]; + *dst++ = map[(quad >> 6) & 63]; + if (!url_encoded) + *dst++ = '='; + } else { + *dst++ = map[(quad >> 12) & 63]; + if (!url_encoded) { + *dst++ = '='; + *dst++ = '='; + } + } + } + + *dst = '\0'; + return dst - _dst; +} + +static int decode_hex(int ch) +{ + if ('0' <= ch && ch <= '9') + return ch - '0'; + if ('A' <= ch && ch <= 'F') + return ch - 'A' + 0xa; + if ('a' <= ch && ch <= 'f') + return ch - 'a' + 0xa; + return -1; +} + +int h2o_hex_decode(void *_dst, const char *src, size_t src_len) +{ + unsigned char *dst = _dst; + + if (src_len % 2 != 0) + return -1; + for (; src_len != 0; src_len -= 2) { + int hi, lo; + if ((hi = decode_hex(*src++)) == -1 || (lo = decode_hex(*src++)) == -1) + return -1; + *dst++ = (hi << 4) | lo; + } + return 0; +} + +void h2o_hex_encode(char *dst, const void *_src, size_t src_len) +{ + const unsigned char *src = _src, *src_end = src + src_len; + for (; src != src_end; ++src) { + *dst++ = "0123456789abcdef"[*src >> 4]; + *dst++ = "0123456789abcdef"[*src & 0xf]; + } + *dst = '\0'; +} + +h2o_iovec_t h2o_uri_escape(h2o_mem_pool_t *pool, const char *s, size_t l, const char *preserve_chars) +{ + h2o_iovec_t encoded; + size_t i, capacity = l * 3 + 1; + + encoded.base = pool != NULL ? h2o_mem_alloc_pool(pool, capacity) : h2o_mem_alloc(capacity); + encoded.len = 0; + + /* RFC 3986: + path-noscheme = segment-nz-nc *( "/" segment ) + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + */ + for (i = 0; i != l; ++i) { + int ch = s[i]; + if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ('0' <= ch && ch <= '9') || ch == '-' || ch == '.' || + ch == '_' || ch == '~' || ch == '!' || ch == '$' || ch == '&' || ch == '\'' || ch == '(' || ch == ')' || ch == '*' || + ch == '+' || ch == ',' || ch == ';' || ch == '=' || + (ch != '\0' && preserve_chars != NULL && strchr(preserve_chars, ch) != NULL)) { + encoded.base[encoded.len++] = ch; + } else { + encoded.base[encoded.len++] = '%'; + encoded.base[encoded.len++] = "0123456789ABCDEF"[(ch >> 4) & 0xf]; + encoded.base[encoded.len++] = "0123456789ABCDEF"[ch & 0xf]; + } + } + encoded.base[encoded.len] = '\0'; + + return encoded; +} + +h2o_iovec_t h2o_get_filext(const char *path, size_t len) +{ + const char *end = path + len, *p = end; + + while (--p != path) { + if (*p == '.') { + return h2o_iovec_init(p + 1, end - (p + 1)); + } else if (*p == '/') { + break; + } + } + return h2o_iovec_init(NULL, 0); +} + +static int is_ws(int ch) +{ + return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; +} + +h2o_iovec_t h2o_str_stripws(const char *s, size_t len) +{ + const char *end = s + len; + + while (s != end) { + if (!is_ws(*s)) + break; + ++s; + } + while (s != end) { + if (!is_ws(end[-1])) + break; + --end; + } + return h2o_iovec_init(s, end - s); +} + +size_t h2o_strstr(const char *haysack, size_t haysack_len, const char *needle, size_t needle_len) +{ + /* TODO optimize */ + if (haysack_len >= needle_len) { + size_t off, max = haysack_len - needle_len + 1; + if (needle_len == 0) + return 0; + for (off = 0; off != max; ++off) + if (haysack[off] == needle[0] && memcmp(haysack + off + 1, needle + 1, needle_len - 1) == 0) + return off; + } + return SIZE_MAX; +} + +/* note: returns a zero-width match as well */ +const char *h2o_next_token(h2o_iovec_t *iter, int separator, size_t *element_len, h2o_iovec_t *value) +{ + const char *cur = iter->base, *end = iter->base + iter->len, *token_start, *token_end; + + /* find start */ + for (;; ++cur) { + if (cur == end) + return NULL; + if (!(*cur == ' ' || *cur == '\t')) + break; + } + token_start = cur; + token_end = cur; + + /* find last */ + for (;; ++cur) { + if (cur == end) + break; + if (*cur == separator) { + ++cur; + break; + } + if (*cur == ',') { + if (token_start == cur) { + ++cur; + token_end = cur; + } + break; + } + if (value != NULL && *cur == '=') { + ++cur; + goto FindValue; + } + if (!(*cur == ' ' || *cur == '\t')) + token_end = cur + 1; + } + + /* found */ + *iter = h2o_iovec_init(cur, end - cur); + *element_len = token_end - token_start; + if (value != NULL) + *value = (h2o_iovec_t){NULL}; + return token_start; + +FindValue: + *iter = h2o_iovec_init(cur, end - cur); + *element_len = token_end - token_start; + if ((value->base = (char *)h2o_next_token(iter, separator, &value->len, NULL)) == NULL) { + *value = (h2o_iovec_t){"", 0}; + } else if (h2o_memis(value->base, value->len, H2O_STRLIT(","))) { + *value = (h2o_iovec_t){"", 0}; + iter->base -= 1; + iter->len += 1; + } + return token_start; +} + +int h2o_contains_token(const char *haysack, size_t haysack_len, const char *needle, size_t needle_len, int separator) +{ + h2o_iovec_t iter = h2o_iovec_init(haysack, haysack_len); + const char *token = NULL; + size_t token_len = 0; + + while ((token = h2o_next_token(&iter, separator, &token_len, NULL)) != NULL) { + if (h2o_lcstris(token, token_len, needle, needle_len)) { + return 1; + } + } + return 0; +} + +h2o_iovec_t h2o_htmlescape(h2o_mem_pool_t *pool, const char *src, size_t len) +{ + const char *s, *end = src + len; + size_t add_size = 0; + +#define ENTITY_MAP() \ + ENTITY('"', """); \ + ENTITY('&', "&"); \ + ENTITY('\'', "'"); \ + ENTITY('<', "<"); \ + ENTITY('>', ">"); + + for (s = src; s != end; ++s) { + if ((unsigned)(unsigned char)*s - '"' <= '>' - '"') { + switch (*s) { +#define ENTITY(code, quoted) \ + case code: \ + add_size += sizeof(quoted) - 2; \ + break + ENTITY_MAP() +#undef ENTITY + } + } + } + + /* escape and return the result if necessary */ + if (add_size != 0) { + /* allocate buffer and fill in the chars that are known not to require escaping */ + h2o_iovec_t escaped = {h2o_mem_alloc_pool(pool, len + add_size + 1), 0}; + /* fill-in the rest */ + for (s = src; s != end; ++s) { + switch (*s) { +#define ENTITY(code, quoted) \ + case code: \ + memcpy(escaped.base + escaped.len, quoted, sizeof(quoted) - 1); \ + escaped.len += sizeof(quoted) - 1; \ + break + ENTITY_MAP() +#undef ENTITY + default: + escaped.base[escaped.len++] = *s; + break; + } + } + assert(escaped.len == len + add_size); + escaped.base[escaped.len] = '\0'; + + return escaped; + } + +#undef ENTITY_MAP + + /* no need not escape; return the original */ + return h2o_iovec_init(src, len); +} + +h2o_iovec_t h2o_concat_list(h2o_mem_pool_t *pool, h2o_iovec_t *list, size_t count) +{ + h2o_iovec_t ret = {NULL, 0}; + size_t i; + + /* calc the length */ + for (i = 0; i != count; ++i) { + ret.len += list[i].len; + } + + /* allocate memory */ + if (pool != NULL) + ret.base = h2o_mem_alloc_pool(pool, ret.len + 1); + else + ret.base = h2o_mem_alloc(ret.len + 1); + + /* concatenate */ + ret.len = 0; + for (i = 0; i != count; ++i) { + h2o_memcpy(ret.base + ret.len, list[i].base, list[i].len); + ret.len += list[i].len; + } + ret.base[ret.len] = '\0'; + + return ret; +} + +int h2o_str_at_position(char *buf, const char *src, size_t src_len, int lineno, int column) +{ + const char *src_end = src + src_len; + int i; + + /* find the line */ + if (lineno <= 0 || column <= 0) + return -1; + for (--lineno; lineno != 0; --lineno) { + do { + if (src == src_end) + return -1; + } while (*src++ != '\n'); + } + + /* adjust the starting column */ + while (column > 40) { + if (src != src_end) + ++src; + --column; + } + + /* emit */ + for (i = 1; i <= 76; ++i) { + if (src == src_end || *src == '\n') + break; + *buf++ = *src++; + } + if (i < column) + column = i; + *buf++ = '\n'; + for (i = 1; i < column; ++i) + *buf++ = ' '; + *buf++ = '^'; + *buf++ = '\n'; + *buf = '\0'; + return 0; +} -- cgit v1.2.3