summaryrefslogtreecommitdiffstats
path: root/nselib/formulas.lua
blob: 6f19000fa1f825fcfeeee67942688a121aa32743 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
---
-- Formula functions for various calculations.
--
-- The library lets scripts to use common mathematical functions to compute percentages,
-- averages, entropy, randomness and other calculations. Scripts that generate statistics
-- and metrics can also make use of this library.
--
-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html
---

local math = require "math"
local stdnse = require "stdnse"
local string = require "string"
local table = require "table"
local unittest = require "unittest"

_ENV = stdnse.module("formulas", stdnse.seeall)

--- Calculate the entropy of a password.
--
-- A random password's information entropy, H, is given by the formula: H = L *
-- (logN) / (log2), where N is the number of possible symbols and L is the
-- number of symbols in the password. Based on
-- https://en.wikipedia.org/wiki/Password_strength
-- @param value The password to check
-- @return The entropy in bits
calcPwdEntropy = function(value)

    local total, hasdigit, haslower, hasupper, hasspaces = 0, 0, 0, 0, false

    if string.find(value, "%d") then
        hasdigit = 1
    end
    if string.find(value, "%l") then
        haslower = 1
    end
    if string.find(value, "%u") then
        hasupper = 1
    end
    if string.find(value, ' ') then
        hasspaces = true
    end

    -- The values 10, 26, 26 have been taken from Wikipedia's entropy table.
    local total = hasdigit * 10 + hasupper * 26 + haslower * 26
    local entropy = math.floor(math.log(total) * #value / math.log(2))

    return entropy
end

-- A chi-square test for the null hypothesis that the members of data are drawn
-- from a uniform distribution over num_cats categories.
local function chi2(data, num_cats)
    local bins = {}
    local x2, delta, expected

    for _, x in ipairs(data) do
        bins[x] = bins[x] or 0
        bins[x] = bins[x] + 1
    end

    expected = #data / num_cats
    x2 = 0.0
    for _, n in pairs(bins) do
        delta = n - expected
        x2 = x2 + delta * delta
    end
    x2 = x2 / expected

    return x2
end

local function c_to_bin (c)
  local n = stdnse.tobinary(c:byte())
  return ("0"):rep(8-#n)..n
end

-- Split a string into a sequence of bit strings of the given length.
-- splitbits("abc", 5) --> {"01100", "00101", "10001", "00110"}
-- Any short final group is omitted.
local function splitbits(s, n)
    local bits = s:gsub(".", c_to_bin)

    local seq = {}
    for i = 1, #bits - n, n do
        seq[#seq + 1] = bits:sub(i, i + n - 1)
    end

    return seq
end

-- chi-square cdf table at 0.95 confidence for different degrees of freedom.
-- >>> import scipy.stats, scipy.optimize
-- >>> scipy.optimize.newton(lambda x: scipy.stats.chi2(dof).cdf(x) - 0.95, dof)
local CHI2_CDF = {
    [3] = 7.8147279032511738,
    [15] = 24.99579013972863,
    [255] = 293.2478350807001,
}

--- Checks whether a sample looks random
--
-- Because our sample is so small (only 16 bytes), do a chi-square
-- goodness of fit test across groups of 2, 4, and 8 bits. If using only
-- 8 bits, for example, any sample whose bytes are all different would
-- pass the test. Using 2 bits will tend to catch things like pure
-- ASCII, where one out of every four samples never has its high bit
-- set.
-- @param data The data to check
-- @return True if the data appears to be random, false otherwise
function looksRandom(data)
    local x2


    x2 = chi2(splitbits(data, 2), 4)
    if x2 > CHI2_CDF[3] then
        return false
    end

    x2 = chi2(splitbits(data, 4), 16)
    if x2 > CHI2_CDF[15] then
        return false
    end

    x2 = chi2({string.byte(data, 1, -1)}, 256)
    if x2 > CHI2_CDF[255] then
        return false
    end

    return true
end

--- Return the mean and sample standard deviation of an array, using the
-- algorithm from Knuth Vol. 2, Section 4.2.2.
--
-- @params t An array-style table of values
-- @return The mean of the values
-- @return The standard deviation of the values
function mean_stddev(t)
  local i, m, s, sigma

  if #t == 0 then
    return nil, nil
  elseif #t == 1 then
    return t[1], 0
  end

  m = t[1]
  s = 0
  for i = 2, #t do
    local mp = m
    m = m + (t[i] - m) / i
    s = s + (t[i] - mp) * (t[i] - m)
  end
  sigma = math.sqrt(s / (#t - 1))

  return m, sigma
end

-- Partition function for quickselect and quicksort
local function partition(t, left, right, pivot)
  local pv = t[pivot]
  t[pivot], t[right] = t[right], t[pivot]
  local storeidx = left
  for i=left, right-1 do
    assert(storeidx < right)
    if t[i] < pv then
      t[storeidx], t[i] = t[i], t[storeidx]
      storeidx = storeidx + 1
    end
  end
  t[storeidx], t[right] = t[right], t[storeidx]
  return storeidx
end

-- Quickselect algorithm
local function _qselect(t, left, right, k)
  if left == right then
    return t[left]
  end
  local pivot = math.random(left, right)
  pivot = partition(t, left, right, pivot)
  if k == pivot then
    return t[k]
  elseif k < pivot then
    return _qselect(t, left, pivot - 1, k)
  else
    return _qselect(t, pivot + 1, right, k)
  end
end

--- Return the k-th largest element in a list
--
-- @param t The list, not sorted
-- @param k The ordinal value to return
-- @return The k-th largest element in the list
function quickselect(t, k)
  local tc = {}
  -- Work on a copy of the table, since we modify in-place
  table.move(t, 1, #t, 1, tc)
  return _qselect(tc, 1, #tc, k)
end

--- Find the median of a list
--
--@param t the table/list of values
--@return the median value
function median(t)
  return quickselect(t, math.ceil(#t/2))
end

if not unittest.testing() then
  return _ENV
end

test_suite = unittest.TestSuite:new()

local table_equal = unittest.table_equal
test_suite:add_test(table_equal(splitbits("abc", 5), {"01100", "00101", "10001", "00110"}), 'splitbits("abc", 5)')
return _ENV