summaryrefslogtreecommitdiffstats
path: root/nselib/lpeg-utility.lua
blob: 47134bc1fc104db7ad587e02175e332259c59ce2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
---
-- Utility functions for LPeg.
--
-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html
-- @class module
-- @name lpeg-utility

local assert = assert

local lpeg = require "lpeg"
local stdnse = require "stdnse"
local pairs = pairs
local string = require "string"
local tonumber = tonumber
local rawset = rawset
local lower = string.lower
local upper = string.upper

_ENV = {}

local caselessP = lpeg.Cf((lpeg.P(1) / function (a) return lpeg.S(lower(a)..upper(a)) end)^1, function (a, b) return a * b end)
---
-- Returns a pattern which matches the literal string caselessly.
--
-- @param literal A literal string to match case-insensitively.
-- @return An LPeg pattern.
function caseless (literal)
  return assert(caselessP:match(literal))
end

---
-- Returns a pattern which matches the input pattern anywhere on a subject string.
--
-- @param patt Input pattern.
-- @return An LPeg pattern.
function anywhere (patt)
  return lpeg.P {
    patt + 1 * lpeg.V(1)
  }
end

---
-- Adds the current locale from lpeg.locale() to the grammar and returns the final pattern.
--
-- @param grammar Input grammar.
-- @return An LPeg pattern.
function localize (grammar)
  return lpeg.P(lpeg.locale(grammar))
end

---
-- Splits the input string on the input separator.
--
-- @param str Input string to split.
-- @param sep Input string/pattern to separate on.
-- @return All splits.
function split (str, sep)
  return lpeg.P {
    lpeg.V "elem" * (lpeg.V "sep" * lpeg.V "elem")^0,
    elem = lpeg.C((1 - lpeg.V "sep")^0),
    sep = sep,
  } :match(str)
end

---
-- Returns a pattern which only matches at a word boundary (beginning).
--
-- Essentially the same as '\b' in a PCRE pattern.
--
-- @param patt A pattern.
-- @return A new LPeg pattern.
function atwordboundary (patt)
  return _ENV.localize {
    patt + lpeg.V "alpha"^0 * (1 - lpeg.V "alpha")^1 * lpeg.V(1)
  }
end

---
-- Returns a pattern which captures the contents of a quoted string.
--
-- This can handle embedded escaped quotes, and captures the unescaped string.
--
-- @param quot The quote character to use. Default: '"'
-- @param esc The escape character to use. Cannot be the same as quot. Default: "\"
function escaped_quote (quot, esc)
  quot = quot or '"'
  esc = esc or '\\'
  return lpeg.P {
    lpeg.Cs(lpeg.V "quot" * lpeg.Cs((lpeg.V "simple_char" + lpeg.V "noesc" + lpeg.V "unesc")^0) * lpeg.V "quot"),
    quot = lpeg.P(quot)/"",
    esc = lpeg.P(esc),
    simple_char = (lpeg.P(1) - (lpeg.V "quot" + lpeg.V "esc")),
    unesc = (lpeg.V "esc" * lpeg.C( lpeg.V "esc" + lpeg.P(quot) ))/"%1",
    noesc = lpeg.V "esc" * lpeg.V "simple_char"
  }
end

---
-- Adds hooks to a grammar to print debugging information
--
-- Debugging LPeg grammars can be difficult. Calling this function on your
-- grammmar will cause it to print ENTER and LEAVE statements for each rule, as
-- well as position and subject after each successful rule match.
--
-- For convenience, the modified grammar is returned; a copy is not made
-- though, and the original grammar is modified as well.
--
-- @param grammar The LPeg grammar to modify
-- @param printer A printf-style formatting printer function to use.
--                Default: stdnse.debug1
-- @return The modified grammar.
function debug (grammar, printer)
  printer = printer or stdnse.debug1
  -- Original code credit: http://lua-users.org/lists/lua-l/2009-10/msg00774.html
  for k, p in pairs(grammar) do
    local enter = lpeg.Cmt(lpeg.P(true), function(s, p, ...)
      printer("ENTER %s", k) return p end)
    local leave = lpeg.Cmt(lpeg.P(true), function(s, p, ...)
      printer("LEAVE %s", k) return p end) * (lpeg.P("k") - lpeg.P "k");
    grammar[k] = lpeg.Cmt(enter * p + leave, function(s, p, ...)
      printer("---%s---", k) printer("pos: %d, [%s]", p, s:sub(1, p-1)) return p end)
  end
  return grammar
end

do
  -- Cache the returned pattern
  local getquote = escaped_quote()

  -- Substitution pattern to unescape a string
  local unescape = lpeg.P {
    -- Substitute captures
    lpeg.Cs((lpeg.V "simple_char" + lpeg.V "unesc")^0),
    -- Escape char is '\'
    esc = lpeg.P "\\",
    -- Simple char is anything but escape char
    simple_char = lpeg.P(1) - lpeg.V "esc",
    -- If we hit an escape, process specials or hex code, otherwise remove the escape
    unesc = (lpeg.V "esc" * lpeg.Cs( lpeg.V "specials" + lpeg.V "code" + lpeg.P(1) ))/"%1",
    -- single-char escapes. These are the only ones service_scan uses
    specials = lpeg.S "trn0" / {t="\t", r="\r", n="\n", ["0"]="\0"},
    -- hex escape: convert to char
    code = (lpeg.P "x" * lpeg.C(lpeg.S "0123456789abcdefABCDEF"^-2))/function(c)
    return string.char(tonumber(c,16)) end,
  }

  --- Turn the service fingerprint reply to a probe into a binary blob
  --@param fp the <code>port.version.service_fp</code> provided by the NSE API.
  --@param probe the probe name to match, e.g. GetRequest, TLSSessionReq, etc.
  --@return the raw probe response received to that probe, or nil if there was no response.
  function get_response (fp, probe)
    fp = string.gsub(fp, "\nSF:", "")
    local i, e = string.find(fp, string.format("%s,%%x+,", probe))
    if i == nil then return nil end
    return unescape:match(getquote:match(fp, e+1))
  end

  local svfp_parser = lpeg.P ({
      anywhere("%r(") * lpeg.Cf(lpeg.Ct("") * (lpeg.V "probematch" * lpeg.P(")%r(")^-1)^1, rawset),
      probematch = lpeg.Cg(lpeg.C((lpeg.P(1) - ",")^1) * "," * (lpeg.R("09") + lpeg.R("AF"))^1 * "," * lpeg.Cs(getquote/function(q) return unescape:match(q) end)),
    })
  --- Get the service fingerprint reply to a probe into a binary blob
  --@param fp the <code>port.version.service_fp</code> provided by the NSE API.
  function parse_fp (fp)
    fp = string.gsub(fp, "\nSF:", "")
    return svfp_parser:match(fp)
  end
end

return _ENV