summaryrefslogtreecommitdiffstats
path: root/scripts/http-sitemap-generator.nse
blob: bb2f8f5a389e77fd7a64d6074deb6e7e55345a31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
description = [[
Spiders a web server and displays its directory structure along with
number and types of files in each folder. Note that files listed as
having an 'Other' extension are ones that have no extension or that
are a root document.
]]

---
-- @usage
-- nmap --script http-sitemap-generator -p 80 <host>
--
--
-- @output
-- PORT   STATE SERVICE REASON
-- 80/tcp open  http    syn-ack
-- | http-sitemap-generator:
-- |   Directory structure:
-- |     /
-- |       Other: 1
-- |     /images/
-- |       png: 1
-- |     /shared/css/
-- |       css: 1
-- |     /shared/images/
-- |       gif: 1; png: 1
-- |   Longest directory structure:
-- |     Depth: 2
-- |     Dir: /shared/css/
-- |   Total files found (by extension):
-- |_    Other: 1; css: 1; gif: 1; png: 2
--
-- @args http-sitemap-generator.maxdepth the maximum amount of directories beneath
--       the initial url to spider. A negative value disables the limit.
--       (default: 3)
-- @args http-sitemap-generator.maxpagecount the maximum amount of pages to visit.
--       A negative value disables the limit (default: 20)
-- @args http-sitemap-generator.url the url to start spidering. This is a URL
--       relative to the scanned host eg. /default.html (default: /)
-- @args http-sitemap-generator.withinhost only spider URLs within the same host.
--       (default: true)
-- @args http-sitemap-generator.withindomain only spider URLs within the same
--       domain. This widens the scope from <code>withinhost</code> and can
--       not be used in combination. (default: false)
--

author = "Piotr Olma"
license = "Same as Nmap--See https://nmap.org/book/man-legal.html"
categories = {"discovery", "intrusive"}

local shortport = require 'shortport'
local stdnse = require 'stdnse'
local url = require 'url'
local httpspider = require 'httpspider'
local string = require 'string'
local table = require 'table'

portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open")

local function dict_add(d, k, v)
  if not d[k] then
    d[k] = {}
    d[k][v] = 1
  elseif d[k][v] then
    d[k][v] = d[k][v]+1
  else
    d[k][v] = 1
  end
end

local function map(f, t)
  local new_t = {}
  for _,v in ipairs(t) do
    new_t[#new_t+1] = f(v)
  end
  return new_t
end

local function sort_dirs(t)
  local keys_table = {}
  for k,_ in pairs(t) do
    keys_table[#keys_table+1] = k
  end
  table.sort(keys_table)
  local newdirs = {}
  map(function(d) newdirs[#newdirs+1]={d, t[d]} end, keys_table)
  return newdirs
end

local function sort_by_keys(t)
  local keys_table = {}
  for k,_ in pairs(t) do
    keys_table[#keys_table+1] = k
  end
  table.sort(keys_table)
  return map(function(e) return e..": "..tostring(t[e]) end, keys_table)
end

local function internal_table_to_output(t)
  local output = {}
  for _,dir in ipairs(t) do
    local ext_and_occurrences = sort_by_keys(dir[2])
    output[#output+1] = {name=dir[1], table.concat(ext_and_occurrences, "; ")}
  end
  return output
end

local function get_file_extension(f)
  return string.match(f, ".-/.-%.([^/%.]*)$") or "Other"
end

-- removes /../ and /./ from paths; for example
-- normalize_path("/a/v/../../da/as/d/a/a/aa/../") -> "/da/as/d/a/a/"
local function normalize_path(p)
  local n=0
  p = p:gsub("/%.%f[/]", "")
  p = p:gsub("/%.$", "/")
  repeat
    p, n = string.gsub(p, "/[^/]-/%.%.", "")
  until n==0
  return p
end

function action(host, port)
  local starting_url = stdnse.get_script_args('http-sitemap-generator.url') or "/"

  -- create a new crawler instance
  local crawler = httpspider.Crawler:new(  host, port, nil, { scriptname = SCRIPT_NAME, noblacklist=true, useheadfornonwebfiles=true } )

  if ( not(crawler) ) then
    return
  end

  local visited = {}
  local dir_structure = {}
  local total_ext = {}
  local longest_dir_structure = {dir="/", depth=0}
  while(true) do
    local status, r = crawler:crawl()

    if ( not(status) ) then
      if ( r.err ) then
        return stdnse.format_output(false, r.reason)
      else
        break
      end
    end
    if r.response.status and r.response.status == 200 then
      --check if we've already visited this file
      local path = normalize_path(r.url.path)
      if not visited[path] then
        local ext = get_file_extension(path)
        if total_ext[ext] then total_ext[ext]=total_ext[ext]+1 else total_ext[ext]=1 end
        local dir = normalize_path(r.url.dir)
        local _,dir_depth = string.gsub(dir,"/","/")
        -- check if this path is the longest one
        dir_depth = dir_depth - 1 -- first '/'
        if dir_depth > longest_dir_structure["depth"] then
          longest_dir_structure["dir"] = dir
          longest_dir_structure["depth"] = dir_depth
        end
        dict_add(dir_structure, dir, ext)
        -- when withinhost=false, then maybe we'd like to include the full url
        -- with each path listed in the output
        visited[path] = true
      end
    end
  end

  local out = internal_table_to_output(sort_dirs(dir_structure))
  local tot = sort_by_keys(total_ext)
  out =
  {
    "Directory structure:", out,
    {name="Longest directory structure:", "Depth: "..tostring(longest_dir_structure.depth), "Dir: "..longest_dir_structure.dir},
    {name="Total files found (by extension):", table.concat(tot, "; ")}
  }
  return stdnse.format_output(true, out)
end