1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
|
---
-- Formula functions for various calculations.
--
-- The library lets scripts to use common mathematical functions to compute percentages,
-- averages, entropy, randomness and other calculations. Scripts that generate statistics
-- and metrics can also make use of this library.
--
-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html
---
local math = require "math"
local stdnse = require "stdnse"
local string = require "string"
local table = require "table"
local unittest = require "unittest"
_ENV = stdnse.module("formulas", stdnse.seeall)
--- Calculate the entropy of a password.
--
-- A random password's information entropy, H, is given by the formula: H = L *
-- (logN) / (log2), where N is the number of possible symbols and L is the
-- number of symbols in the password. Based on
-- https://en.wikipedia.org/wiki/Password_strength
-- @param value The password to check
-- @return The entropy in bits
calcPwdEntropy = function(value)
local total, hasdigit, haslower, hasupper, hasspaces = 0, 0, 0, 0, false
if string.find(value, "%d") then
hasdigit = 1
end
if string.find(value, "%l") then
haslower = 1
end
if string.find(value, "%u") then
hasupper = 1
end
if string.find(value, ' ') then
hasspaces = true
end
-- The values 10, 26, 26 have been taken from Wikipedia's entropy table.
local total = hasdigit * 10 + hasupper * 26 + haslower * 26
local entropy = math.floor(math.log(total) * #value / math.log(2))
return entropy
end
-- A chi-square test for the null hypothesis that the members of data are drawn
-- from a uniform distribution over num_cats categories.
local function chi2(data, num_cats)
local bins = {}
local x2, delta, expected
for _, x in ipairs(data) do
bins[x] = bins[x] or 0
bins[x] = bins[x] + 1
end
expected = #data / num_cats
x2 = 0.0
for _, n in pairs(bins) do
delta = n - expected
x2 = x2 + delta * delta
end
x2 = x2 / expected
return x2
end
local function c_to_bin (c)
local n = stdnse.tobinary(c:byte())
return ("0"):rep(8-#n)..n
end
-- Split a string into a sequence of bit strings of the given length.
-- splitbits("abc", 5) --> {"01100", "00101", "10001", "00110"}
-- Any short final group is omitted.
local function splitbits(s, n)
local bits = s:gsub(".", c_to_bin)
local seq = {}
for i = 1, #bits - n, n do
seq[#seq + 1] = bits:sub(i, i + n - 1)
end
return seq
end
-- chi-square cdf table at 0.95 confidence for different degrees of freedom.
-- >>> import scipy.stats, scipy.optimize
-- >>> scipy.optimize.newton(lambda x: scipy.stats.chi2(dof).cdf(x) - 0.95, dof)
local CHI2_CDF = {
[3] = 7.8147279032511738,
[15] = 24.99579013972863,
[255] = 293.2478350807001,
}
--- Checks whether a sample looks random
--
-- Because our sample is so small (only 16 bytes), do a chi-square
-- goodness of fit test across groups of 2, 4, and 8 bits. If using only
-- 8 bits, for example, any sample whose bytes are all different would
-- pass the test. Using 2 bits will tend to catch things like pure
-- ASCII, where one out of every four samples never has its high bit
-- set.
-- @param data The data to check
-- @return True if the data appears to be random, false otherwise
function looksRandom(data)
local x2
x2 = chi2(splitbits(data, 2), 4)
if x2 > CHI2_CDF[3] then
return false
end
x2 = chi2(splitbits(data, 4), 16)
if x2 > CHI2_CDF[15] then
return false
end
x2 = chi2({string.byte(data, 1, -1)}, 256)
if x2 > CHI2_CDF[255] then
return false
end
return true
end
--- Return the mean and sample standard deviation of an array, using the
-- algorithm from Knuth Vol. 2, Section 4.2.2.
--
-- @params t An array-style table of values
-- @return The mean of the values
-- @return The standard deviation of the values
function mean_stddev(t)
local i, m, s, sigma
if #t == 0 then
return nil, nil
elseif #t == 1 then
return t[1], 0
end
m = t[1]
s = 0
for i = 2, #t do
local mp = m
m = m + (t[i] - m) / i
s = s + (t[i] - mp) * (t[i] - m)
end
sigma = math.sqrt(s / (#t - 1))
return m, sigma
end
-- Partition function for quickselect and quicksort
local function partition(t, left, right, pivot)
local pv = t[pivot]
t[pivot], t[right] = t[right], t[pivot]
local storeidx = left
for i=left, right-1 do
assert(storeidx < right)
if t[i] < pv then
t[storeidx], t[i] = t[i], t[storeidx]
storeidx = storeidx + 1
end
end
t[storeidx], t[right] = t[right], t[storeidx]
return storeidx
end
-- Quickselect algorithm
local function _qselect(t, left, right, k)
if left == right then
return t[left]
end
local pivot = math.random(left, right)
pivot = partition(t, left, right, pivot)
if k == pivot then
return t[k]
elseif k < pivot then
return _qselect(t, left, pivot - 1, k)
else
return _qselect(t, pivot + 1, right, k)
end
end
--- Return the k-th largest element in a list
--
-- @param t The list, not sorted
-- @param k The ordinal value to return
-- @return The k-th largest element in the list
function quickselect(t, k)
local tc = {}
-- Work on a copy of the table, since we modify in-place
table.move(t, 1, #t, 1, tc)
return _qselect(tc, 1, #tc, k)
end
--- Find the median of a list
--
--@param t the table/list of values
--@return the median value
function median(t)
return quickselect(t, math.ceil(#t/2))
end
if not unittest.testing() then
return _ENV
end
test_suite = unittest.TestSuite:new()
local table_equal = unittest.table_equal
test_suite:add_test(table_equal(splitbits("abc", 5), {"01100", "00101", "10001", "00110"}), 'splitbits("abc", 5)')
return _ENV
|