summaryrefslogtreecommitdiffstats
path: root/scripts/http-useragent-tester.nse
blob: d6e0acabcf751b3c4acd96a934f8e1b4b0669897 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
description = [[
Checks if various crawling utilities are allowed by the host.
]]

---
-- @usage nmap -p80 --script http-useragent-tester.nse <host>
--
-- This script sets various User-Agent headers that are used by different
-- utilities and crawling libraries (for example CURL or wget). If the request is
-- redirected to a page different than a (valid) browser request would be, that
-- means that this utility is banned.
--
-- @args http-useragent-tester.useragents A table with more User-Agent headers.
--       Default: nil
--
-- @output
-- PORT   STATE SERVICE REASON
-- 80/tcp open  http    syn-ack
-- | http-useragent-tester:
-- |   Status for browser useragent: 200
-- |   Redirected To: https://www.example.com/
-- |   Allowed User Agents:
-- |     Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)
-- |     libwww
-- |     lwp-trivial
-- |     libcurl-agent/1.0
-- |     PHP/
-- |     GT::WWW
-- |     Snoopy
-- |     MFC_Tear_Sample
-- |     HTTP::Lite
-- |     PHPCrawl
-- |     URI::Fetch
-- |     Zend_Http_Client
-- |     http client
-- |     PECL::HTTP
-- |     WWW-Mechanize/1.34
-- |   Change in Status Code:
-- |     Python-urllib/2.5: 403
-- |_    Wget/1.13.4 (linux-gnu): 403
--
-- @xmloutput
-- <elem key="Status for browser useragent">200</elem>
-- <elem key="Redirected To">https://www.example.com/</elem>
-- <table key="Allowed User Agents">
--   <elem>Mozilla/5.0 (compatible; Nmap Scripting Engine;
--   https://nmap.org/book/nse.html)</elem>
--   <elem>libwww</elem>
--   <elem>lwp-trivial</elem>
--   <elem>libcurl-agent/1.0</elem>
--   <elem>PHP/</elem>
--   <elem>GT::WWW</elem>
--   <elem>Snoopy</elem>
--   <elem>MFC_Tear_Sample</elem>
--   <elem>HTTP::Lite</elem>
--   <elem>PHPCrawl</elem>
--   <elem>URI::Fetch</elem>
--   <elem>Zend_Http_Client</elem>
--   <elem>http client</elem>
--   <elem>PECL::HTTP</elem>
--   <elem>WWW-Mechanize/1.34</elem>
-- </table>
-- <table key="Change in Status Code">
--   <elem key="Python-urllib/2.5">403</elem>
--   <elem key="Wget/1.13.4 (linux-gnu)">403</elem>
-- </table>
---

categories = {"discovery", "safe"}
author = "George Chatzisofroniou"
license = "Same as Nmap--See https://nmap.org/book/man-legal.html"

local http = require "http"
local target = require "target"
local httpspider = require "httpspider"
local shortport = require "shortport"
local stdnse = require "stdnse"
local table = require "table"
local url = require "url"

getLastLoc = function(host, port, useragent)

  local options

  options = {header={}, no_cache=true, bypass_cache=true, redirect_ok=function(host,port)
      local c = 3
      return function(url)
        if ( c==0 ) then return false end
        c = c - 1
        return true
      end
  end }


  options['header']['User-Agent'] = useragent

  stdnse.debug2("Making a request with User-Agent: " .. useragent)

  local response = http.get(host, port, '/', options)
  if response.location then
    return response.location[#response.location],response.status or false, response.status
  end

  return false, response.status

end

portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open")

action = function(host, port)

  local moreagents = stdnse.get_script_args("http-useragent-tester.useragents") or nil
  local newtargets = stdnse.get_script_args("newtargets") or nil
  local output = stdnse.output_table()

  -- We don't crawl any site. We initialize a crawler to use its iswithinhost method.
  local crawler = httpspider.Crawler:new(host, port, '/', { scriptname = SCRIPT_NAME } )

  local HTTPlibs = {
    http.USER_AGENT,
    "libwww",
    "lwp-trivial",
    "libcurl-agent/1.0",
    "PHP/",
    "Python-urllib/2.5",
    "GT::WWW",
    "Snoopy",
    "MFC_Tear_Sample",
    "HTTP::Lite",
    "PHPCrawl",
    "URI::Fetch",
    "Zend_Http_Client",
    "http client",
    "PECL::HTTP",
    "Wget/1.13.4 (linux-gnu)",
    "WWW-Mechanize/1.34"
  }

  if moreagents then
    for _, l in ipairs(moreagents) do
      table.insert(HTTPlibs, l)
    end
  end

  -- We perform a normal browser request and get the returned location
  local loc, status = getLastLoc(host, port, "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17")
  output['Status for browser useragent'] = status

  if loc then
    output['Redirected To'] = loc
  end

  local allowed, forb, status_changed = {}, {}, {}

  for _, l in ipairs(HTTPlibs) do

    local libloc, libstatus = getLastLoc(host, port, l)

    -- If the library's request returned a different location, that means the request was redirected somewhere else, hence is forbidden.
    if libloc and loc ~= libloc then
      forb[l] = {}
      local libhost = url.parse(libloc)
      if not crawler:iswithinhost(libhost.host) then
        forb[l]['Different Host'] = tostring(libloc)
        if newtargets then
          target.add(libhost.host)
        end
      else
        forb[l]['Same Host'] = tostring(libloc)
      end
    elseif status ~= libstatus then
      status_changed[l] = libstatus
    else
      table.insert(allowed, l)
    end

  end

  if next(allowed) ~= nil then
    output['Allowed User Agents'] = allowed
  end

  if next(forb) ~= nil then
    output['Forbidden/Redirected User Agents'] = forb
  end

  if next(status_changed) ~= nil then
    output['Change in Status Code'] = status_changed
  end

  return output

end