description = [[ Spiders a web server and displays its directory structure along with number and types of files in each folder. Note that files listed as having an 'Other' extension are ones that have no extension or that are a root document. ]] --- -- @usage -- nmap --script http-sitemap-generator -p 80 -- -- -- @output -- PORT STATE SERVICE REASON -- 80/tcp open http syn-ack -- | http-sitemap-generator: -- | Directory structure: -- | / -- | Other: 1 -- | /images/ -- | png: 1 -- | /shared/css/ -- | css: 1 -- | /shared/images/ -- | gif: 1; png: 1 -- | Longest directory structure: -- | Depth: 2 -- | Dir: /shared/css/ -- | Total files found (by extension): -- |_ Other: 1; css: 1; gif: 1; png: 2 -- -- @args http-sitemap-generator.maxdepth the maximum amount of directories beneath -- the initial url to spider. A negative value disables the limit. -- (default: 3) -- @args http-sitemap-generator.maxpagecount the maximum amount of pages to visit. -- A negative value disables the limit (default: 20) -- @args http-sitemap-generator.url the url to start spidering. This is a URL -- relative to the scanned host eg. /default.html (default: /) -- @args http-sitemap-generator.withinhost only spider URLs within the same host. -- (default: true) -- @args http-sitemap-generator.withindomain only spider URLs within the same -- domain. This widens the scope from withinhost and can -- not be used in combination. (default: false) -- author = "Piotr Olma" license = "Same as Nmap--See https://nmap.org/book/man-legal.html" categories = {"discovery", "intrusive"} local shortport = require 'shortport' local stdnse = require 'stdnse' local url = require 'url' local httpspider = require 'httpspider' local string = require 'string' local table = require 'table' portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open") local function dict_add(d, k, v) if not d[k] then d[k] = {} d[k][v] = 1 elseif d[k][v] then d[k][v] = d[k][v]+1 else d[k][v] = 1 end end local function map(f, t) local new_t = {} for _,v in ipairs(t) do new_t[#new_t+1] = f(v) end return new_t end local function sort_dirs(t) local keys_table = {} for k,_ in pairs(t) do keys_table[#keys_table+1] = k end table.sort(keys_table) local newdirs = {} map(function(d) newdirs[#newdirs+1]={d, t[d]} end, keys_table) return newdirs end local function sort_by_keys(t) local keys_table = {} for k,_ in pairs(t) do keys_table[#keys_table+1] = k end table.sort(keys_table) return map(function(e) return e..": "..tostring(t[e]) end, keys_table) end local function internal_table_to_output(t) local output = {} for _,dir in ipairs(t) do local ext_and_occurrences = sort_by_keys(dir[2]) output[#output+1] = {name=dir[1], table.concat(ext_and_occurrences, "; ")} end return output end local function get_file_extension(f) return string.match(f, ".-/.-%.([^/%.]*)$") or "Other" end -- removes /../ and /./ from paths; for example -- normalize_path("/a/v/../../da/as/d/a/a/aa/../") -> "/da/as/d/a/a/" local function normalize_path(p) local n=0 p = p:gsub("/%.%f[/]", "") p = p:gsub("/%.$", "/") repeat p, n = string.gsub(p, "/[^/]-/%.%.", "") until n==0 return p end function action(host, port) local starting_url = stdnse.get_script_args('http-sitemap-generator.url') or "/" -- create a new crawler instance local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME, noblacklist=true, useheadfornonwebfiles=true } ) if ( not(crawler) ) then return end local visited = {} local dir_structure = {} local total_ext = {} local longest_dir_structure = {dir="/", depth=0} while(true) do local status, r = crawler:crawl() if ( not(status) ) then if ( r.err ) then return stdnse.format_output(false, r.reason) else break end end if r.response.status and r.response.status == 200 then --check if we've already visited this file local path = normalize_path(r.url.path) if not visited[path] then local ext = get_file_extension(path) if total_ext[ext] then total_ext[ext]=total_ext[ext]+1 else total_ext[ext]=1 end local dir = normalize_path(r.url.dir) local _,dir_depth = string.gsub(dir,"/","/") -- check if this path is the longest one dir_depth = dir_depth - 1 -- first '/' if dir_depth > longest_dir_structure["depth"] then longest_dir_structure["dir"] = dir longest_dir_structure["depth"] = dir_depth end dict_add(dir_structure, dir, ext) -- when withinhost=false, then maybe we'd like to include the full url -- with each path listed in the output visited[path] = true end end end local out = internal_table_to_output(sort_dirs(dir_structure)) local tot = sort_by_keys(total_ext) out = { "Directory structure:", out, {name="Longest directory structure:", "Depth: "..tostring(longest_dir_structure.depth), "Dir: "..longest_dir_structure.dir}, {name="Total files found (by extension):", table.concat(tot, "; ")} } return stdnse.format_output(true, out) end