summaryrefslogtreecommitdiffstats
path: root/libnetdata/url/url.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-24 09:54:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-24 09:54:44 +0000
commit836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch)
tree1604da8f482d02effa033c94a84be42bc0c848c3 /libnetdata/url/url.c
parentReleasing debian version 1.44.3-2. (diff)
downloadnetdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz
netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip
Merging upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'libnetdata/url/url.c')
-rw-r--r--libnetdata/url/url.c299
1 files changed, 0 insertions, 299 deletions
diff --git a/libnetdata/url/url.c b/libnetdata/url/url.c
deleted file mode 100644
index 39366cbe8..000000000
--- a/libnetdata/url/url.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include "../libnetdata.h"
-
-// ----------------------------------------------------------------------------
-// URL encode / decode
-// code from: http://www.geekhideout.com/urlcode.shtml
-
-/* Converts a hex character to its integer value */
-char from_hex(char ch) {
- return (char)(isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10);
-}
-
-/* Converts an integer value to its hex character*/
-char to_hex(char code) {
- static char hex[] = "0123456789abcdef";
- return hex[code & 15];
-}
-
-/* Returns an url-encoded version of str */
-/* IMPORTANT: be sure to free() the returned string after use */
-char *url_encode(char *str) {
- char *buf, *pbuf;
-
- pbuf = buf = mallocz(strlen(str) * 3 + 1);
-
- while (*str) {
- if (isalnum(*str) || *str == '-' || *str == '_' || *str == '.' || *str == '~')
- *pbuf++ = *str;
-
- else if (*str == ' ')
- *pbuf++ = '+';
-
- else{
- *pbuf++ = '%';
- *pbuf++ = to_hex((char)(*str >> 4));
- *pbuf++ = to_hex((char)(*str & 15));
- }
-
- str++;
- }
- *pbuf = '\0';
-
- pbuf = strdupz(buf);
- freez(buf);
- return pbuf;
-}
-
-/**
- * Percentage escape decode
- *
- * Decode %XX character or return 0 if cannot
- *
- * @param s the string to decode
- *
- * @return The character decoded on success and 0 otherwise
- */
-char url_percent_escape_decode(const char *s) {
- if(likely(s[1] && s[2]))
- return (char)(from_hex(s[1]) << 4 | from_hex(s[2]));
- return 0;
-}
-
-/**
- * Get byte length
- *
- * This (utf8 string related) should be moved in separate file in future
- *
- * @param c is the utf8 character
- * *
- * @return It returns the length of the specific character.
- */
-char url_utf8_get_byte_length(char c) {
- if(!IS_UTF8_BYTE(c))
- return 1;
-
- char length = 0;
- while(likely(c & 0x80)) {
- length++;
- c <<= 1;
- }
- //4 byte is max size for UTF-8 char
- //10XX XXXX is not valid character -> check length == 1
- if(length > 4 || length == 1)
- return -1;
-
- return length;
-}
-
-/**
- * Decode Multibyte UTF8
- *
- * Decode % encoded UTF-8 characters and copy them to *d
- *
- * @param s first address
- * @param d
- * @param d_end last address
- *
- * @return count of bytes written to *d
- */
-char url_decode_multibyte_utf8(const char *s, char *d, const char *d_end) {
- char first_byte = url_percent_escape_decode(s);
-
- if(unlikely(!first_byte || !IS_UTF8_STARTBYTE(first_byte)))
- return 0;
-
- char byte_length = url_utf8_get_byte_length(first_byte);
-
- if(unlikely(byte_length <= 0 || d+byte_length >= d_end))
- return 0;
-
- char to_read = byte_length;
- while(to_read > 0) {
- char c = url_percent_escape_decode(s);
-
- if(unlikely( !IS_UTF8_BYTE(c) ))
- return 0;
- if((to_read != byte_length) && IS_UTF8_STARTBYTE(c))
- return 0;
-
- *d++ = c;
- s+=3;
- to_read--;
- }
-
- return byte_length;
-}
-
-/*
- * The utf8_check() function scans the '\0'-terminated string starting
- * at s. It returns a pointer to the first byte of the first malformed
- * or overlong UTF-8 sequence found, or NULL if the string contains
- * only correct UTF-8. It also spots UTF-8 sequences that could cause
- * trouble if converted to UTF-16, namely surrogate characters
- * (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This
- * routine is very likely to find a malformed sequence if the input
- * uses any other encoding than UTF-8. It therefore can be used as a
- * very effective heuristic for distinguishing between UTF-8 and other
- * encodings.
- *
- * Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30
- * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
- */
-unsigned char *utf8_check(unsigned char *s)
-{
- while (*s)
- {
- if (*s < 0x80)
- /* 0xxxxxxx */
- s++;
- else if ((s[0] & 0xe0) == 0xc0)
- {
- /* 110XXXXx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- (s[0] & 0xfe) == 0xc0) /* overlong? */
- return s;
- else
- s += 2;
- }
- else if ((s[0] & 0xf0) == 0xe0)
- {
- /* 1110XXXX 10Xxxxxx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- (s[2] & 0xc0) != 0x80 ||
- (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */
- (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */
- (s[0] == 0xef && s[1] == 0xbf &&
- (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */
- return s;
- else
- s += 3;
- }
- else if ((s[0] & 0xf8) == 0xf0)
- {
- /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- (s[2] & 0xc0) != 0x80 ||
- (s[3] & 0xc0) != 0x80 ||
- (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */
- (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */
- return s;
- else
- s += 4;
- }
- else
- return s;
- }
-
- return NULL;
-}
-
-char *url_decode_r(char *to, const char *url, size_t size) {
- const char *s = url; // source
- char *d = to, // destination
- *e = &to[size - 1]; // destination end
-
- while(*s && d < e) {
- if(unlikely(*s == '%')) {
- char t = url_percent_escape_decode(s);
- if(IS_UTF8_BYTE(t)) {
- char bytes_written = url_decode_multibyte_utf8(s, d, e);
- if(likely(bytes_written)){
- d += bytes_written;
- s += (bytes_written * 3)-1;
- }
- else {
- goto fail_cleanup;
- }
- }
- else if(likely(t) && isprint(t)) {
- // avoid HTTP header injection
- *d++ = t;
- s += 2;
- }
- else
- goto fail_cleanup;
- }
- else if(unlikely(*s == '+'))
- *d++ = ' ';
-
- else
- *d++ = *s;
-
- s++;
- }
-
- *d = '\0';
-
- if(unlikely( utf8_check((unsigned char *)to) )) //NULL means success here
- return NULL;
-
- return to;
-
-fail_cleanup:
- *d = '\0';
- return NULL;
-}
-
-inline bool url_is_request_complete(char *begin, char *end, size_t length, char **post_payload, size_t *post_payload_size) {
- if (begin == end || length < 4)
- return false;
-
- if(likely(strncmp(begin, "GET ", 4)) == 0) {
- return strstr(end - 4, "\r\n\r\n");
- }
- else if(unlikely(strncmp(begin, "POST ", 5) == 0 || strncmp(begin, "PUT ", 4) == 0)) {
- char *cl = strstr(begin, "Content-Length: ");
- if(!cl) return false;
- cl = &cl[16];
-
- size_t content_length = str2ul(cl);
-
- char *payload = strstr(cl, "\r\n\r\n");
- if(!payload) return false;
- payload += 4;
-
- size_t payload_length = length - (payload - begin);
-
- if(payload_length == content_length) {
- if(post_payload && post_payload_size) {
- if (*post_payload)
- freez(*post_payload);
-
- *post_payload = mallocz(payload_length + 1);
- memcpy(*post_payload, payload, payload_length);
- (*post_payload)[payload_length] = '\0';
-
- *post_payload_size = payload_length;
- }
- return true;
- }
-
- return false;
- }
- else {
- return strstr(end - 4, "\r\n\r\n");
- }
-}
-
-/**
- * Find protocol
- *
- * Search for the string ' HTTP/' in the message given.
- *
- * @param s is the start of the user request.
- * @return
- */
-inline char *url_find_protocol(char *s) {
- while(*s) {
- // find the next space
- while (*s && *s != ' ') s++;
-
- // is it SPACE + "HTTP/" ?
- if(*s && !strncmp(s, " HTTP/", 6)) break;
- else s++;
- }
-
- return s;
-}