summaryrefslogtreecommitdiffstats
path: root/nselib/url.lua
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--nselib/url.lua543
1 files changed, 543 insertions, 0 deletions
diff --git a/nselib/url.lua b/nselib/url.lua
new file mode 100644
index 0000000..3234604
--- /dev/null
+++ b/nselib/url.lua
@@ -0,0 +1,543 @@
+---
+-- URI parsing, composition, and relative URL resolution.
+--
+-- A URL is represented as a table with the following entries:
+-- * <code>scheme</code>
+-- * <code>fragment</code>
+-- * <code>query</code>
+-- * <code>params</code>
+-- * <code>authority</code>
+-- * <code>userinfo</code>
+-- * <code>path</code>
+-- * <code>port</code>
+-- * <code>password</code>
+-- These correspond to these parts of a URL (some may be <code>nil</code>):
+-- <code>
+-- scheme://userinfo@password:authority:port/path;params?query#fragment
+-- </code>
+--
+-- @author Diego Nehab
+-- @author Eddie Bell <ejlbell@gmail.com>
+
+--[[
+URI parsing, composition and relative URL resolution
+LuaSocket toolkit.
+Author: Diego Nehab
+RCS ID: $Id: url.lua,v 1.37 2005/11/22 08:33:29 diego Exp $
+
+parse_query and build_query added For nmap (Eddie Bell <ejlbell@gmail.com>)
+--]]
+
+-----------------------------------------------------------------------------
+-- Declare module
+-----------------------------------------------------------------------------
+
+local _G = require "_G"
+local stdnse = require "stdnse"
+local string = require "string"
+local table = require "table"
+local idna = require "idna"
+local tableaux = require "tableaux"
+local unicode = require "unicode"
+local unittest = require "unittest"
+local base = _G
+
+
+_ENV = stdnse.module("url", stdnse.seeall)
+
+_VERSION = "URL 1.0"
+
+--[[ Internal functions --]]
+
+local function make_set(t)
+ local s = {}
+ for i,v in base.ipairs(t) do
+ s[t[i]] = 1
+ end
+ return s
+end
+
+local function hex_esc (c)
+ return string.format("%%%02X", string.byte(c))
+end
+
+-- these are allowed within a path segment, along with alphanum
+-- other characters must be escaped
+local segment_set = make_set {
+ "-", "_", ".", "!", "~", "*", "'", "(",
+ ")", ":", "@", "&", "=", "+", "$", ",",
+}
+setmetatable(segment_set, { __index = function(t, c) return hex_esc(c) end })
+
+---
+-- Protects a path segment, to prevent it from interfering with the
+-- URL parsing.
+-- @param s Binary string to be encoded.
+-- @return Escaped representation of string.
+local function protect_segment(s)
+ return string.gsub(s, "([^A-Za-z0-9_.~-])", segment_set)
+end
+
+---
+-- Builds a path from a base path and a relative path
+-- @param base_path A base path.
+-- @param relative_path A relative path.
+-- @return The corresponding absolute path.
+-----------------------------------------------------------------------------
+local function absolute_path(base_path, relative_path)
+ -- Function for normalizing trailing dot and dot-dot by adding the final /
+ local fixdots = function (s)
+ return s:gsub("%f[^/\0]%.$", "./"):gsub("%f[^/\0]%.%.$", "../")
+ end
+ local path = relative_path
+ if path:sub(1, 1) ~= "/" then
+ -- function wrapper to avoid %-substitution of captures
+ path = fixdots(base_path):gsub("[^/]*$", function() return path end)
+ end
+ -- Break the path into segments, processing dot and dot-dot
+ local segs = {}
+ for s in fixdots(path):gmatch("[^/]*") do
+ if s == "." then -- ignore
+ elseif s == ".." then -- remove the previous segment
+ if #segs > 1 or (#segs == 1 and segs[#segs] ~= "") then
+ table.remove(segs)
+ end
+ else -- add a regular segment, possibly empty
+ table.insert(segs, s)
+ end
+ end
+ return table.concat(segs, "/")
+end
+
+
+--[[ External functions --]]
+
+---
+-- Encodes a string into its escaped hexadecimal representation.
+-- @param s Binary string to be encoded.
+-- @return Escaped representation of string.
+-----------------------------------------------------------------------------
+function escape(s)
+ return (string.gsub(s, "([^A-Za-z0-9_.~-])", hex_esc))
+end
+
+
+local function hex_unesc (hex)
+ return string.char(base.tonumber(hex, 16))
+end
+
+---
+-- Decodes an escaped hexadecimal string.
+-- @param s Hexadecimal-encoded string.
+-- @return Decoded string.
+-----------------------------------------------------------------------------
+function unescape(s)
+ return (string.gsub(s, "%%(%x%x)", hex_unesc))
+end
+
+local function normalize_escape (s)
+ return escape(unescape(s))
+end
+
+function ascii_hostname(host)
+ local hostname = stdnse.get_hostname(host)
+ if hostname:match("[\x80-\xff]") then
+ -- TODO: Allow other Unicode encodings
+ local decoded = unicode.decode(hostname, unicode.utf8_dec)
+ if decoded then
+ local ascii_host = idna.toASCII(decoded)
+ if ascii_host then
+ hostname = ascii_host
+ end
+ end
+ end
+ return hostname
+end
+
+---
+-- Parses a URL and returns a table with all its parts according to RFC 3986.
+--
+-- The following grammar describes the names given to the URL parts.
+-- <code>
+-- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
+-- <authority> ::= <userinfo>@<host>:<port>
+-- <userinfo> ::= <user>[:<password>]
+-- <path> :: = {<segment>/}<segment>
+-- </code>
+--
+-- The leading <code>/</code> in <code>/<path></code> is considered part of
+-- <code><path></code>.
+--
+-- If the host contains non-ASCII characters, the Punycode-encoded version of
+-- the host name will be in the <code>ascii_host</code> field of the returned
+-- table.
+--
+-- @param url URL of request.
+-- @param default Table with default values for each field.
+-- @return A table with the following fields, where RFC naming conventions have
+-- been preserved:
+-- <code>scheme</code>, <code>authority</code>, <code>userinfo</code>,
+-- <code>user</code>, <code>password</code>,
+-- <code>host</code>, <code>ascii_host</code>,
+-- <code>port</code>, <code>path</code>, <code>params</code>,
+-- <code>query</code>, and <code>fragment</code>.
+-----------------------------------------------------------------------------
+function parse(url, default)
+ -- initialize default parameters
+ local parsed = {}
+
+ for i,v in base.pairs(default or parsed) do parsed[i] = v end
+ -- remove whitespace
+ -- url = string.gsub(url, "%s", "")
+ -- Decode unreserved characters
+ url = string.gsub(url, "%%%x%x", normalize_escape)
+ -- get fragment
+ url = string.gsub(url, "#(.*)$", function(f)
+ parsed.fragment = f
+ return ""
+ end)
+ -- get scheme. Lower-case according to RFC 3986 section 3.1.
+ url = string.gsub(url, "^(%w[%w.+-]*):",
+ function(s) parsed.scheme = string.lower(s); return "" end)
+ -- get authority
+ url = string.gsub(url, "^//([^/]*)", function(n)
+ parsed.authority = n
+ return ""
+ end)
+ -- get query stringing
+ url = string.gsub(url, "%?(.*)", function(q)
+ parsed.query = q
+ return ""
+ end)
+ -- get params
+ url = string.gsub(url, "%;(.*)", function(p)
+ parsed.params = p
+ return ""
+ end)
+
+ -- path is whatever was left
+ parsed.path = url
+
+ -- Checks for folder route and extension
+ if parsed.path:sub(-1) == "/" then
+ parsed.is_folder = true
+ else
+ parsed.is_folder = false
+ parsed.extension = parsed.path:match("%.([^/.;]+)%f[;\0][^/]*$")
+ end
+
+ -- Represents host:port, port = nil if not used.
+ local authority = parsed.authority
+ if not authority then return parsed end
+ authority = string.gsub(authority,"^([^@]*)@",
+ function(u) parsed.userinfo = u; return "" end)
+ authority = string.gsub(authority, ":(%d+)$",
+ function(p) parsed.port = tonumber(p); return "" end)
+ if authority ~= "" then parsed.host = authority end
+ if parsed.host then
+ parsed.ascii_host = ascii_hostname(parsed.host)
+ end
+ local userinfo = parsed.userinfo
+ if not userinfo then return parsed end
+ userinfo = string.gsub(userinfo, ":([^:]*)$",
+ function(p) parsed.password = p; return "" end)
+ parsed.user = userinfo
+ return parsed
+end
+
+---
+-- Rebuilds a parsed URL from its components.
+--
+-- Components are protected if any reserved or disallowed characters are found.
+-- @param parsed Parsed URL, as returned by parse.
+-- @return A string with the corresponding URL.
+-----------------------------------------------------------------------------
+function build(parsed)
+ local ppath = parse_path(parsed.path or "")
+ local url = build_path(ppath)
+ if parsed.params then url = url .. ";" .. parsed.params end
+ if parsed.query then url = url .. "?" .. parsed.query end
+ local authority = parsed.authority
+ if parsed.host then
+ authority = parsed.host
+ if parsed.port then authority = authority .. ":" .. parsed.port end
+ local userinfo = parsed.userinfo
+ if parsed.user then
+ userinfo = parsed.user
+ if parsed.password then
+ userinfo = userinfo .. ":" .. parsed.password
+ end
+ end
+ if userinfo then authority = userinfo .. "@" .. authority end
+ end
+ if authority then url = "//" .. authority .. url end
+ if parsed.scheme then url = parsed.scheme .. ":" .. url end
+ if parsed.fragment then url = url .. "#" .. parsed.fragment end
+ -- url = string.gsub(url, "%s", "")
+ return url
+end
+
+---
+-- Builds an absolute URL from a base and a relative URL according to RFC 2396.
+-- @param base_url A base URL.
+-- @param relative_url A relative URL.
+-- @return The corresponding absolute URL.
+-----------------------------------------------------------------------------
+function absolute(base_url, relative_url)
+ local base_parsed;
+ if type(base_url) == "table" then
+ base_parsed = base_url
+ base_url = build(base_parsed)
+ else
+ base_parsed = parse(base_url)
+ end
+ local relative_parsed = parse(relative_url)
+ if not base_parsed then return relative_url
+ elseif not relative_parsed then return base_url
+ elseif relative_parsed.scheme then return relative_url
+ else
+ relative_parsed.scheme = base_parsed.scheme
+ if not relative_parsed.authority then
+ relative_parsed.authority = base_parsed.authority
+ if not relative_parsed.path then
+ relative_parsed.path = base_parsed.path
+ if not relative_parsed.params then
+ relative_parsed.params = base_parsed.params
+ if not relative_parsed.query then
+ relative_parsed.query = base_parsed.query
+ end
+ end
+ else
+ relative_parsed.path = absolute_path(base_parsed.path or "",
+ relative_parsed.path)
+ end
+ end
+ return build(relative_parsed)
+ end
+end
+
+---
+-- Breaks a path into its segments, unescaping the segments.
+-- @param path A path to break.
+-- @return A table with one entry per segment.
+-----------------------------------------------------------------------------
+function parse_path(path)
+ local parsed = {}
+ path = path or ""
+ --path = string.gsub(path, "%s", "")
+ string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
+ for i, v in ipairs(parsed) do
+ parsed[i] = unescape(v)
+ end
+ if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
+ if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
+ return parsed
+end
+
+---
+-- Builds a path component from its segments, escaping protected characters.
+-- @param parsed Path segments.
+-- @param unsafe If true, segments are not protected before path is built.
+-- @return The corresponding path string
+-----------------------------------------------------------------------------
+function build_path(parsed, unsafe)
+ local path = {}
+ if parsed.is_absolute then path[#path+1] = "/" end
+ local n = #parsed
+ if unsafe then
+ for i = 1, n-1 do
+ path[#path+1] = parsed[i] .. "/"
+ end
+ if n > 0 then
+ path[#path+1] = parsed[n]
+ if parsed.is_directory then path[#path+1] = "/" end
+ end
+ else
+ for i = 1, n-1 do
+ path[#path+1] = protect_segment(parsed[i]) .. "/"
+ end
+ if n > 0 then
+ path[#path+1] = protect_segment(parsed[n])
+ if parsed.is_directory then path[#path+1] = "/" end
+ end
+ end
+ return table.concat(path)
+end
+
+local entities = {
+ ["amp"] = "&",
+ ["lt"] = "<",
+ ["gt"] = ">"
+}
+---
+-- Breaks a query string into name/value pairs.
+--
+-- This function takes a <code><query></code> of the form
+-- <code>"name1=value1&name2=value2"</code>
+-- and returns a table containing the name-value pairs, with the name as the key
+-- and the value as its associated value. Both the name and the value are
+-- subject to URL decoding.
+-- @param query Query string.
+-- @return A table of name-value pairs following the pattern
+-- <code>table["name"]</code> = <code>value</code>.
+-----------------------------------------------------------------------------
+function parse_query(query)
+ local parsed = {}
+ local pos = 1
+
+ query = string.gsub(query, "&([ampltg]+);", entities)
+
+ local function ginsert(qstr)
+ local pos = qstr:find("=", 1, true)
+ if pos then
+ parsed[unescape(qstr:sub(1, pos - 1))] = unescape(qstr:sub(pos + 1))
+ else
+ parsed[unescape(qstr)] = ""
+ end
+ end
+
+ while true do
+ local first, last = string.find(query, "&", pos, true)
+ if first then
+ ginsert(string.sub(query, pos, first-1));
+ pos = last+1
+ else
+ ginsert(string.sub(query, pos));
+ break;
+ end
+ end
+ return parsed
+end
+
+---
+-- Builds a query string from a table.
+--
+-- This is the inverse of <code>parse_query</code>. Both the parameter name
+-- and value are subject to URL encoding.
+-- @param query A dictionary table where <code>table['name']</code> =
+-- <code>value</code>.
+-- @return A query string (like <code>"name=value2&name=value2"</code>).
+-----------------------------------------------------------------------------
+function build_query(query)
+ local qstr = {}
+
+ for i,v in pairs(query) do
+ qstr[#qstr+1] = escape(i) .. '=' .. escape(v)
+ end
+ return table.concat(qstr, '&')
+end
+
+local get_default_port_ports = {http=80, https=443}
+---
+-- Provides the default port for a given URI scheme.
+--
+-- @param scheme for determining the port, such as "http" or "https".
+-- @return A port number as an integer, such as 443 for scheme "https",
+-- or nil in case of an undefined scheme
+function get_default_port (scheme)
+ return get_default_port_ports[(scheme or ""):lower()]
+end
+
+get_default_scheme_schemes = tableaux.invert(get_default_port_ports)
+
+---
+-- Provides the default URI scheme for a given port.
+--
+-- @param port A port number as a number or port table
+-- @return scheme for addressing the port, such as "http" or "https".
+-----------------------------------------------------------------------------
+function get_default_scheme (port)
+ local number = (type(port) == "number") and port or port.number
+ return get_default_scheme_schemes[number]
+end
+
+if not unittest.testing() then
+ return _ENV
+end
+
+test_suite = unittest.TestSuite:new()
+
+local test_urls = {
+ { _url = "https://dummy:pass@example.com:9999/example.ext?k1=v1&k2=v2#fragment=/",
+ _res = {
+ scheme = "https",
+ authority = "dummy:pass@example.com:9999",
+ userinfo = "dummy:pass",
+ user = "dummy",
+ password = "pass",
+ host = "example.com",
+ port = 9999,
+ path = "/example.ext",
+ query = "k1=v1&k2=v2",
+ fragment = "fragment=/",
+ is_folder = false,
+ extension = "ext",
+ },
+ _nil = {"params"}
+ },
+ { _url = "http://dummy@example.com:1234/example.ext/another.php;k1=v1?k2=v2#k3=v3",
+ _res = {
+ scheme = "http",
+ authority = "dummy@example.com:1234",
+ userinfo = "dummy",
+ user = "dummy",
+ host = "example.com",
+ port = 1234,
+ path = "/example.ext/another.php",
+ params = "k1=v1",
+ query = "k2=v2",
+ fragment = "k3=v3",
+ is_folder = false,
+ extension = "php",
+ },
+ _nil = {"password"}
+ },
+ { _url = "//example/example.folder/?k1=v1&k2=v2#k3/v3.bar",
+ _res = {
+ authority = "example",
+ host = "example",
+ path = "/example.folder/",
+ query = "k1=v1&k2=v2",
+ fragment = "k3/v3.bar",
+ is_folder = true,
+ },
+ _nil = {"scheme", "userinfo", "port", "params", "extension"}
+ },
+}
+for _, t in ipairs(test_urls) do
+ local result = parse(t._url)
+ for _, nv in ipairs(t._nil) do
+ test_suite:add_test(unittest.is_nil(result[nv]), nv)
+ end
+ for k, v in pairs(t._res) do
+ test_suite:add_test(unittest.equal(result[k], v), k)
+ end
+ test_suite:add_test(unittest.equal(build(t._res), t._url), "build test url")
+ test_suite:add_test(unittest.equal(build(result), t._url), "parse/build round trip")
+end
+
+
+-- path merging tests for compliance with RFC 3986, section 5.2
+-- https://tools.ietf.org/html/rfc3986#section-5.2
+local absolute_path_tests = { -- {bpath, rpath, expected}
+ {'a', '.', '' },
+ {'a', './', '' },
+ {'..', 'b', 'b' },
+ {'../', 'b', 'b' },
+ {'/', '..', '/' },
+ {'/', '../', '/' },
+ {'/../', '..', '/' },
+ {'/../', '../', '/' },
+ {'a/..', 'b', 'b' },
+ {'a/../', 'b', 'b' },
+ {'/a/..', '', '/' },
+ {'', '/a/..', '/' },
+ {'', '/a//..', '/a/' },
+ }
+for k, v in ipairs(absolute_path_tests) do
+ local bpath, rpath, expected = table.unpack(v)
+ test_suite:add_test(unittest.equal(absolute_path(bpath, rpath), expected),
+ ("absolute_path #%d (%q,%q)"):format(k, bpath, rpath))
+end
+
+return _ENV;