summaryrefslogtreecommitdiffstats
path: root/lib/util/rfc1738.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/util/rfc1738.c')
-rw-r--r--lib/util/rfc1738.c198
1 files changed, 198 insertions, 0 deletions
diff --git a/lib/util/rfc1738.c b/lib/util/rfc1738.c
new file mode 100644
index 0000000..7b8db11
--- /dev/null
+++ b/lib/util/rfc1738.c
@@ -0,0 +1,198 @@
+/*
+ * Functions for RFC 3986 percent-encoding.
+ *
+ * NOTE:
+ *
+ * This file was originally imported from the Squid project but has been
+ * significantly altered. The licence below is reproduced intact, but refers
+ * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
+ * notice (being the later version mentioned below).
+ */
+
+/*
+ * $Id$
+ *
+ * DEBUG:
+ * AUTHOR: Harvest Derived
+ *
+ * SQUID Web Proxy Cache http://www.squid-cache.org/
+ * ----------------------------------------------------------
+ *
+ * Squid is the result of efforts by numerous individuals from
+ * the Internet community; see the CONTRIBUTORS file for full
+ * details. Many organizations have provided support for Squid's
+ * development; see the SPONSORS file for full details. Squid is
+ * Copyrighted (C) 2001 by the Regents of the University of
+ * California; see the COPYRIGHT file for full details. Squid
+ * incorporates software developed and/or copyrighted by other
+ * sources; see the CREDITS file for full details.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "replace.h"
+#include <talloc.h>
+#include "lib/util/samba_util.h"
+
+#define RFC1738_ENCODE 1
+#define RFC1738_RESERVED 2
+
+/*
+ * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
+ * that has been obsolete since 2004, we sm instead for RFC 3986, where:
+ *
+ * reserved = : / ? # [ ] @ ! $ & ' ( ) * + , ; =
+ * unreserved = ALPHA DIGIT - . _ ~
+ *
+ * and whatever is not in either of those are what RFC 1738 called "unsafe",
+ * meaning that they should are canonically but not mandatorily escaped.
+ *
+ * Characters below 0x20 or above 0x7E are always encoded.
+ */
+
+static const unsigned char escapees[127] = {
+ [' '] = RFC1738_ENCODE,
+ ['"'] = RFC1738_ENCODE,
+ ['%'] = RFC1738_ENCODE,
+ ['<'] = RFC1738_ENCODE,
+ ['>'] = RFC1738_ENCODE,
+ ['\\'] = RFC1738_ENCODE,
+ ['^'] = RFC1738_ENCODE,
+ ['`'] = RFC1738_ENCODE,
+ ['{'] = RFC1738_ENCODE,
+ ['|'] = RFC1738_ENCODE,
+ ['}'] = RFC1738_ENCODE,
+ /* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
+ [':'] = RFC1738_RESERVED,
+ ['/'] = RFC1738_RESERVED,
+ ['?'] = RFC1738_RESERVED,
+ ['#'] = RFC1738_RESERVED,
+ ['['] = RFC1738_RESERVED,
+ [']'] = RFC1738_RESERVED,
+ ['@'] = RFC1738_RESERVED,
+ ['!'] = RFC1738_RESERVED,
+ ['$'] = RFC1738_RESERVED,
+ ['&'] = RFC1738_RESERVED,
+ ['\''] = RFC1738_RESERVED,
+ ['('] = RFC1738_RESERVED,
+ [')'] = RFC1738_RESERVED,
+ ['*'] = RFC1738_RESERVED,
+ ['+'] = RFC1738_RESERVED,
+ [','] = RFC1738_RESERVED,
+ [';'] = RFC1738_RESERVED,
+ ['='] = RFC1738_RESERVED,
+};
+
+/*
+ * rfc1738_do_escape - fills a preallocated buffer with an escaped version of
+ * the given string.
+ *
+ * For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
+ * For mandatory escaping, mask should be RFC1738_RESERVED.
+ */
+static char *
+rfc1738_do_escape(char *buf, size_t bufsize,
+ const char *url, size_t len, unsigned char mask)
+{
+ size_t i;
+ size_t j = 0;
+ for (i = 0; i < len; i++) {
+ unsigned int c = (unsigned char) url[i];
+ if (c > 126 || c < 32 || (escapees[c] & mask)) {
+ if (j + 3 >= bufsize) {
+ return NULL;
+ }
+ (void) snprintf(&buf[j], 4, "%%%02X", c);
+ j += 3;
+ } else {
+ if (j + 1 >= bufsize) {
+ return NULL;
+ }
+ buf[j] = c;
+ j++;
+ }
+ }
+ buf[j] = '\0';
+ return buf;
+}
+
+/*
+ * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
+ * compliant, escaped version of the given url segment.
+ */
+char *
+rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
+{
+ size_t bufsize = 0;
+ char *buf = NULL;
+
+ size_t len = strlen(url);
+ if (len >= SIZE_MAX / 3) {
+ return NULL;
+ }
+
+ bufsize = len * 3 + 1;
+ buf = talloc_array(mem_ctx, char, bufsize);
+ if (buf == NULL) {
+ return NULL;
+ }
+
+ talloc_set_name_const(buf, buf);
+
+ return rfc1738_do_escape(buf, bufsize, url, len,
+ RFC1738_ENCODE | RFC1738_RESERVED);
+}
+
+/*
+ * rfc1738_unescape() - Converts url-escaped characters in the string.
+ *
+ * The two characters following a '%' in a string should be hex digits that
+ * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
+ * this is the only way to include a % in the unescaped string. Any character
+ * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
+ * other than 2 hex characters following the % is an error.
+ *
+ * The conversion is done in-place, which is always safe as unescapes can only
+ * shorten the string.
+ *
+ * Returns a pointer to the end of the string (that is, the '\0' byte), or
+ * NULL on error, at which point s is in an undefined state.
+ *
+ * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
+ * `e - s` if s originally contained "%00". You might want to check for this.
+ */
+
+_PUBLIC_ char *rfc1738_unescape(char *s)
+{
+ size_t i, j; /* i is write, j is read */
+ for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
+ if (s[j] == '%') {
+ uint8_t v;
+ bool ok;
+
+ ok = hex_byte(&s[j+1], &v);
+ if (!ok) {
+ return NULL;
+ }
+ j += 2; /* OK; hex_byte() has checked ahead */
+ s[i] = (unsigned char)v;
+ } else {
+ s[i] = s[j];
+ }
+ }
+ s[i] = '\0';
+ return s + i;
+}