diff options
Diffstat (limited to 'plug-ins/help-browser/uri.c')
-rw-r--r-- | plug-ins/help-browser/uri.c | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/plug-ins/help-browser/uri.c b/plug-ins/help-browser/uri.c new file mode 100644 index 0000000..c0493fa --- /dev/null +++ b/plug-ins/help-browser/uri.c @@ -0,0 +1,386 @@ +/* GIMP - The GNU Image Manipulation Program + * Copyright (C) 1995 Spencer Kimball and Peter Mattis + * + * The GIMP Help Browser - URI functions + * Copyright (C) 2001 Jacob Schroeder <jacob@convergence.de> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <string.h> + +#include <glib.h> + +#include "uri.h" + +/* #define URI_DEBUG 1 */ + +typedef enum +{ + URI_UNKNOWN, + URI_ABSURI, + URI_NETPATH, + URI_ABSPATH, + URI_RELPATH, + URI_QUERY, + URI_EMPTY, + URI_FRAGMENT, + URI_INVALID +} UriType; + + +static UriType +uri_get_type (const gchar *uri) +{ + gchar c; + const gchar *cptr; + UriType type = URI_UNKNOWN; + + if (!uri) + return type; + + cptr = uri; + c = *cptr++; + + if (g_ascii_isalpha (c)) + { + type = URI_RELPATH; /* assume relative path */ + + while ((c = *cptr++)) + { + if (g_ascii_isalnum (c) || c == '+' || c == '-' || c == '.') + continue; + + if (c == ':') + { + /* it was a scheme */ + type = URI_ABSURI; + } + break; + } + } + else + { + switch (c) + { + case '/': + if (*cptr == '/') + { + cptr++; + type = URI_NETPATH; + } + else + { + type = URI_ABSPATH; + } + break; + case '?': + type = URI_QUERY; + break; + case '#': + type = URI_FRAGMENT; + break; + case '\0': + type = URI_EMPTY; + break; + default: + type = URI_RELPATH; + break; + } + } + +#ifdef URI_DEBUG + g_print ("uri_get_type (\"%s\") -> ", uri); + switch (type) + { + case URI_UNKNOWN: g_print ("unknown"); break; + case URI_ABSURI: g_print ("absuri"); break; + case URI_NETPATH: g_print ("netpath"); break; + case URI_ABSPATH: g_print ("abspath"); break; + case URI_RELPATH: g_print ("relpath"); break; + case URI_QUERY: g_print ("query"); break; + case URI_EMPTY: g_print ("empty"); break; + case URI_FRAGMENT: g_print ("fragment"); break; + case URI_INVALID: g_print ("invalid"); break; + } + g_print ("\n"); +#endif + + return type; +} + +gchar * +uri_to_abs (const gchar *uri, + const gchar *base_uri) +{ + gchar c; + const gchar *cptr; + gchar *retval = NULL; + UriType uri_type = URI_UNKNOWN; + UriType base_type = URI_UNKNOWN; + + gint base_cnt = 0; /* no of chars to be copied from base URI */ + gint uri_cnt = 0; /* no of chars to be copied from URI */ + gint sep_cnt = 0; /* no of chars to be inserted between them */ + + const gchar *sep_str = ""; /* string to insert between base and uri */ + const gchar *part; + const gchar *last_segment = NULL; + +#ifdef URI_DEBUG + g_print ("uri_to_abs (\"%s\", \"%s\")\n", uri, base_uri); +#endif + + /* this function does not use the algorithm that is being proposed + * in RFC 2396. Instead it analyses the first characters of each + * URI to determine its kind (abs, net, path, ...). + * After that it locates the missing parts in the base URI and then + * concats everything into a newly allocated string. + */ + + /* determine the kind of the URIs */ + uri_type = uri_get_type (uri); + + if (uri_type != URI_ABSURI) + { + base_type = uri_get_type (base_uri); + + if (base_type != URI_ABSURI) + return NULL; /* neither uri nor base uri are absolute */ + } + + /* find missing parts in base URI */ + switch (uri_type) + { + case URI_ABSURI: + /* base uri not needed */ + break; + + case URI_QUERY: + /* ??? last segment? */ + uri_type = URI_RELPATH; + case URI_NETPATH: /* base scheme */ + case URI_ABSPATH: /* base scheme and authority */ + case URI_RELPATH: /* base scheme, authority and path */ + cptr = base_uri; + + /* skip scheme */ + while ((c = *cptr++) && c != ':') + ; /* nada */ + + base_cnt = cptr - base_uri; /* incl : */ + + if (*cptr != '/') + { + /* completion not possible */ + return NULL; + } + + if (uri_type == URI_NETPATH) + break; + + /* skip authority */ + if (cptr[0] == '/' && cptr[1] == '/') + { + part = cptr; + cptr += 2; + + while ((c = *cptr++) && c != '/' && c != '?' && c != '#') + ; /* nada */ + + cptr--; + base_cnt += cptr - part; + } + + if (uri_type == URI_ABSPATH) + break; + + /* skip path */ + if (*cptr != '/') + { + sep_cnt = 1; + sep_str = "/"; + break; + } + + part = cptr; + + g_assert (*cptr == '/'); + + while ((c = *cptr++) && c != '?' && c != '#') + { + if (c == '/') + last_segment = cptr - 1; + }; + + g_assert (last_segment); + + cptr = last_segment; + + while ((c = *uri) && c == '.' && cptr > part) + { + gint shift_segment = 0; + + c = uri[1]; + + if (c == '.' ) + { + c = uri[2]; + shift_segment = 1; + } + + if (c == '/') + { + uri += 2; + } + else if (c == 0 || c == '?' || c == '#') + { + uri += 1; + } + else + { + break; + } + + g_assert (*cptr == '/'); + + if (shift_segment) + { + uri += 1; + while (cptr > part && *--cptr != '/') + ; /* nada */ + } + } + + base_cnt += cptr - part + 1; + break; + + case URI_EMPTY: + case URI_FRAGMENT: + /* use whole base uri */ + base_cnt = strlen (base_uri); + break; + + case URI_UNKNOWN: + case URI_INVALID: + return NULL; + } + + /* do not include fragment part from the URI reference */ + for (cptr = uri; (c = *cptr) && c != '#'; cptr++) + ; /* nada */ + + uri_cnt = cptr - uri; + + /* allocate string and copy characters */ + + retval = g_new (gchar, base_cnt + sep_cnt + uri_cnt + 1); + + if (base_cnt) + strncpy (retval, base_uri, base_cnt); + + if (sep_cnt) + strncpy (retval + base_cnt, sep_str, sep_cnt); + + if (uri_cnt) + strncpy (retval + base_cnt + sep_cnt, uri, uri_cnt); + + retval[base_cnt + sep_cnt + uri_cnt] = '\0'; + +#ifdef URI_DEBUG + g_print (" -> \"%s\"\n", retval); +#endif + + return retval; +} + +#if 0 +RFC 2396 URI Generic Syntax August 1998 + + +A. Collected BNF for URI + + URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + absoluteURI = scheme ":" ( hier_part | opaque_part ) + relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + + hier_part = ( net_path | abs_path ) [ "?" query ] + opaque_part = uric_no_slash *uric + + uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | + "&" | "=" | "+" | "$" | "," + + net_path = "//" authority [ abs_path ] + abs_path = "/" path_segments + rel_path = rel_segment [ abs_path ] + + rel_segment = 1*( unreserved | escaped | + ";" | "@" | "&" | "=" | "+" | "$" | "," ) + + scheme = alpha *( alpha | digit | "+" | "-" | "." ) + + authority = server | reg_name + + reg_name = 1*( unreserved | escaped | "$" | "," | + ";" | ":" | "@" | "&" | "=" | "+" ) + + server = [ [ userinfo "@" ] hostport ] + userinfo = *( unreserved | escaped | + ";" | ":" | "&" | "=" | "+" | "$" | "," ) + + hostport = host [ ":" port ] + host = hostname | IPv4address + hostname = *( domainlabel "." ) toplabel [ "." ] + domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + toplabel = alpha | alpha *( alphanum | "-" ) alphanum + IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit + port = *digit + + path = [ abs_path | opaque_part ] + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + pchar = unreserved | escaped | + ":" | "@" | "&" | "=" | "+" | "$" | "," + + query = *uric + + fragment = *uric + + uric = reserved | unreserved | escaped + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + unreserved = alphanum | mark + mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | + "(" | ")" + + escaped = "%" hex hex + hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | + "a" | "b" | "c" | "d" | "e" | "f" + + alphanum = alpha | digit + alpha = lowalpha | upalpha + + lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | + "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | + "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" + upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | + "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | + "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" + digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | + "8" | "9" + +#endif |