diff options
Diffstat (limited to '')
-rw-r--r-- | scripts/http-sitemap-generator.nse | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/scripts/http-sitemap-generator.nse b/scripts/http-sitemap-generator.nse new file mode 100644 index 0000000..bb2f8f5 --- /dev/null +++ b/scripts/http-sitemap-generator.nse @@ -0,0 +1,179 @@ +description = [[ +Spiders a web server and displays its directory structure along with +number and types of files in each folder. Note that files listed as +having an 'Other' extension are ones that have no extension or that +are a root document. +]] + +--- +-- @usage +-- nmap --script http-sitemap-generator -p 80 <host> +-- +-- +-- @output +-- PORT STATE SERVICE REASON +-- 80/tcp open http syn-ack +-- | http-sitemap-generator: +-- | Directory structure: +-- | / +-- | Other: 1 +-- | /images/ +-- | png: 1 +-- | /shared/css/ +-- | css: 1 +-- | /shared/images/ +-- | gif: 1; png: 1 +-- | Longest directory structure: +-- | Depth: 2 +-- | Dir: /shared/css/ +-- | Total files found (by extension): +-- |_ Other: 1; css: 1; gif: 1; png: 2 +-- +-- @args http-sitemap-generator.maxdepth the maximum amount of directories beneath +-- the initial url to spider. A negative value disables the limit. +-- (default: 3) +-- @args http-sitemap-generator.maxpagecount the maximum amount of pages to visit. +-- A negative value disables the limit (default: 20) +-- @args http-sitemap-generator.url the url to start spidering. This is a URL +-- relative to the scanned host eg. /default.html (default: /) +-- @args http-sitemap-generator.withinhost only spider URLs within the same host. +-- (default: true) +-- @args http-sitemap-generator.withindomain only spider URLs within the same +-- domain. This widens the scope from <code>withinhost</code> and can +-- not be used in combination. (default: false) +-- + +author = "Piotr Olma" +license = "Same as Nmap--See https://nmap.org/book/man-legal.html" +categories = {"discovery", "intrusive"} + +local shortport = require 'shortport' +local stdnse = require 'stdnse' +local url = require 'url' +local httpspider = require 'httpspider' +local string = require 'string' +local table = require 'table' + +portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open") + +local function dict_add(d, k, v) + if not d[k] then + d[k] = {} + d[k][v] = 1 + elseif d[k][v] then + d[k][v] = d[k][v]+1 + else + d[k][v] = 1 + end +end + +local function map(f, t) + local new_t = {} + for _,v in ipairs(t) do + new_t[#new_t+1] = f(v) + end + return new_t +end + +local function sort_dirs(t) + local keys_table = {} + for k,_ in pairs(t) do + keys_table[#keys_table+1] = k + end + table.sort(keys_table) + local newdirs = {} + map(function(d) newdirs[#newdirs+1]={d, t[d]} end, keys_table) + return newdirs +end + +local function sort_by_keys(t) + local keys_table = {} + for k,_ in pairs(t) do + keys_table[#keys_table+1] = k + end + table.sort(keys_table) + return map(function(e) return e..": "..tostring(t[e]) end, keys_table) +end + +local function internal_table_to_output(t) + local output = {} + for _,dir in ipairs(t) do + local ext_and_occurrences = sort_by_keys(dir[2]) + output[#output+1] = {name=dir[1], table.concat(ext_and_occurrences, "; ")} + end + return output +end + +local function get_file_extension(f) + return string.match(f, ".-/.-%.([^/%.]*)$") or "Other" +end + +-- removes /../ and /./ from paths; for example +-- normalize_path("/a/v/../../da/as/d/a/a/aa/../") -> "/da/as/d/a/a/" +local function normalize_path(p) + local n=0 + p = p:gsub("/%.%f[/]", "") + p = p:gsub("/%.$", "/") + repeat + p, n = string.gsub(p, "/[^/]-/%.%.", "") + until n==0 + return p +end + +function action(host, port) + local starting_url = stdnse.get_script_args('http-sitemap-generator.url') or "/" + + -- create a new crawler instance + local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME, noblacklist=true, useheadfornonwebfiles=true } ) + + if ( not(crawler) ) then + return + end + + local visited = {} + local dir_structure = {} + local total_ext = {} + local longest_dir_structure = {dir="/", depth=0} + while(true) do + local status, r = crawler:crawl() + + if ( not(status) ) then + if ( r.err ) then + return stdnse.format_output(false, r.reason) + else + break + end + end + if r.response.status and r.response.status == 200 then + --check if we've already visited this file + local path = normalize_path(r.url.path) + if not visited[path] then + local ext = get_file_extension(path) + if total_ext[ext] then total_ext[ext]=total_ext[ext]+1 else total_ext[ext]=1 end + local dir = normalize_path(r.url.dir) + local _,dir_depth = string.gsub(dir,"/","/") + -- check if this path is the longest one + dir_depth = dir_depth - 1 -- first '/' + if dir_depth > longest_dir_structure["depth"] then + longest_dir_structure["dir"] = dir + longest_dir_structure["depth"] = dir_depth + end + dict_add(dir_structure, dir, ext) + -- when withinhost=false, then maybe we'd like to include the full url + -- with each path listed in the output + visited[path] = true + end + end + end + + local out = internal_table_to_output(sort_dirs(dir_structure)) + local tot = sort_by_keys(total_ext) + out = + { + "Directory structure:", out, + {name="Longest directory structure:", "Depth: "..tostring(longest_dir_structure.depth), "Dir: "..longest_dir_structure.dir}, + {name="Total files found (by extension):", table.concat(tot, "; ")} + } + return stdnse.format_output(true, out) +end + |