diff options
Diffstat (limited to 'nselib/formulas.lua')
-rw-r--r-- | nselib/formulas.lua | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/nselib/formulas.lua b/nselib/formulas.lua new file mode 100644 index 0000000..6f19000 --- /dev/null +++ b/nselib/formulas.lua @@ -0,0 +1,220 @@ +--- +-- Formula functions for various calculations. +-- +-- The library lets scripts to use common mathematical functions to compute percentages, +-- averages, entropy, randomness and other calculations. Scripts that generate statistics +-- and metrics can also make use of this library. +-- +-- @copyright Same as Nmap--See https://nmap.org/book/man-legal.html +--- + +local math = require "math" +local stdnse = require "stdnse" +local string = require "string" +local table = require "table" +local unittest = require "unittest" + +_ENV = stdnse.module("formulas", stdnse.seeall) + +--- Calculate the entropy of a password. +-- +-- A random password's information entropy, H, is given by the formula: H = L * +-- (logN) / (log2), where N is the number of possible symbols and L is the +-- number of symbols in the password. Based on +-- https://en.wikipedia.org/wiki/Password_strength +-- @param value The password to check +-- @return The entropy in bits +calcPwdEntropy = function(value) + + local total, hasdigit, haslower, hasupper, hasspaces = 0, 0, 0, 0, false + + if string.find(value, "%d") then + hasdigit = 1 + end + if string.find(value, "%l") then + haslower = 1 + end + if string.find(value, "%u") then + hasupper = 1 + end + if string.find(value, ' ') then + hasspaces = true + end + + -- The values 10, 26, 26 have been taken from Wikipedia's entropy table. + local total = hasdigit * 10 + hasupper * 26 + haslower * 26 + local entropy = math.floor(math.log(total) * #value / math.log(2)) + + return entropy +end + +-- A chi-square test for the null hypothesis that the members of data are drawn +-- from a uniform distribution over num_cats categories. +local function chi2(data, num_cats) + local bins = {} + local x2, delta, expected + + for _, x in ipairs(data) do + bins[x] = bins[x] or 0 + bins[x] = bins[x] + 1 + end + + expected = #data / num_cats + x2 = 0.0 + for _, n in pairs(bins) do + delta = n - expected + x2 = x2 + delta * delta + end + x2 = x2 / expected + + return x2 +end + +local function c_to_bin (c) + local n = stdnse.tobinary(c:byte()) + return ("0"):rep(8-#n)..n +end + +-- Split a string into a sequence of bit strings of the given length. +-- splitbits("abc", 5) --> {"01100", "00101", "10001", "00110"} +-- Any short final group is omitted. +local function splitbits(s, n) + local bits = s:gsub(".", c_to_bin) + + local seq = {} + for i = 1, #bits - n, n do + seq[#seq + 1] = bits:sub(i, i + n - 1) + end + + return seq +end + +-- chi-square cdf table at 0.95 confidence for different degrees of freedom. +-- >>> import scipy.stats, scipy.optimize +-- >>> scipy.optimize.newton(lambda x: scipy.stats.chi2(dof).cdf(x) - 0.95, dof) +local CHI2_CDF = { + [3] = 7.8147279032511738, + [15] = 24.99579013972863, + [255] = 293.2478350807001, +} + +--- Checks whether a sample looks random +-- +-- Because our sample is so small (only 16 bytes), do a chi-square +-- goodness of fit test across groups of 2, 4, and 8 bits. If using only +-- 8 bits, for example, any sample whose bytes are all different would +-- pass the test. Using 2 bits will tend to catch things like pure +-- ASCII, where one out of every four samples never has its high bit +-- set. +-- @param data The data to check +-- @return True if the data appears to be random, false otherwise +function looksRandom(data) + local x2 + + + x2 = chi2(splitbits(data, 2), 4) + if x2 > CHI2_CDF[3] then + return false + end + + x2 = chi2(splitbits(data, 4), 16) + if x2 > CHI2_CDF[15] then + return false + end + + x2 = chi2({string.byte(data, 1, -1)}, 256) + if x2 > CHI2_CDF[255] then + return false + end + + return true +end + +--- Return the mean and sample standard deviation of an array, using the +-- algorithm from Knuth Vol. 2, Section 4.2.2. +-- +-- @params t An array-style table of values +-- @return The mean of the values +-- @return The standard deviation of the values +function mean_stddev(t) + local i, m, s, sigma + + if #t == 0 then + return nil, nil + elseif #t == 1 then + return t[1], 0 + end + + m = t[1] + s = 0 + for i = 2, #t do + local mp = m + m = m + (t[i] - m) / i + s = s + (t[i] - mp) * (t[i] - m) + end + sigma = math.sqrt(s / (#t - 1)) + + return m, sigma +end + +-- Partition function for quickselect and quicksort +local function partition(t, left, right, pivot) + local pv = t[pivot] + t[pivot], t[right] = t[right], t[pivot] + local storeidx = left + for i=left, right-1 do + assert(storeidx < right) + if t[i] < pv then + t[storeidx], t[i] = t[i], t[storeidx] + storeidx = storeidx + 1 + end + end + t[storeidx], t[right] = t[right], t[storeidx] + return storeidx +end + +-- Quickselect algorithm +local function _qselect(t, left, right, k) + if left == right then + return t[left] + end + local pivot = math.random(left, right) + pivot = partition(t, left, right, pivot) + if k == pivot then + return t[k] + elseif k < pivot then + return _qselect(t, left, pivot - 1, k) + else + return _qselect(t, pivot + 1, right, k) + end +end + +--- Return the k-th largest element in a list +-- +-- @param t The list, not sorted +-- @param k The ordinal value to return +-- @return The k-th largest element in the list +function quickselect(t, k) + local tc = {} + -- Work on a copy of the table, since we modify in-place + table.move(t, 1, #t, 1, tc) + return _qselect(tc, 1, #tc, k) +end + +--- Find the median of a list +-- +--@param t the table/list of values +--@return the median value +function median(t) + return quickselect(t, math.ceil(#t/2)) +end + +if not unittest.testing() then + return _ENV +end + +test_suite = unittest.TestSuite:new() + +local table_equal = unittest.table_equal +test_suite:add_test(table_equal(splitbits("abc", 5), {"01100", "00101", "10001", "00110"}), 'splitbits("abc", 5)') +return _ENV |