summaryrefslogtreecommitdiffstats
path: root/lualib/lua_lexer.lua
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /lualib/lua_lexer.lua
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lualib/lua_lexer.lua')
-rw-r--r--lualib/lua_lexer.lua163
1 files changed, 163 insertions, 0 deletions
diff --git a/lualib/lua_lexer.lua b/lualib/lua_lexer.lua
new file mode 100644
index 0000000..54bbd7c
--- /dev/null
+++ b/lualib/lua_lexer.lua
@@ -0,0 +1,163 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[ Lua LPEG grammar based on https://github.com/xolox/lua-lxsh/ ]]
+
+
+local lpeg = require "lpeg"
+
+local P = lpeg.P
+local R = lpeg.R
+local S = lpeg.S
+local D = R '09' -- Digits
+local I = R('AZ', 'az', '\127\255') + '_' -- Identifiers
+local B = -(I + D) -- Word boundary
+local EOS = -lpeg.P(1) -- end of string
+
+-- Pattern for long strings and long comments.
+local longstring = #(P '[[' + (P '[' * P '=' ^ 0 * '[')) * P(function(input, index)
+ local level = input:match('^%[(=*)%[', index)
+ if level then
+ local _, last = input:find(']' .. level .. ']', index, true)
+ if last then
+ return last + 1
+ end
+ end
+end)
+
+-- String literals.
+local singlequoted = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
+local doublequoted = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
+
+-- Comments.
+local eol = P '\r\n' + '\n'
+local line = (1 - S '\r\n\f') ^ 0 * eol ^ -1
+local singleline = P '--' * line
+local multiline = P '--' * longstring
+
+-- Numbers.
+local sign = S '+-' ^ -1
+local decimal = D ^ 1
+local hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
+local float = D ^ 1 * P '.' * D ^ 0 + P '.' * D ^ 1
+local maybeexp = (float + decimal) * (S 'eE' * sign * D ^ 1) ^ -1
+
+local function compile_keywords(keywords)
+ local list = {}
+ for word in keywords:gmatch('%S+') do
+ list[#list + 1] = word
+ end
+ -- Sort by length
+ table.sort(list, function(a, b)
+ return #a > #b
+ end)
+
+ local pattern
+ for _, word in ipairs(list) do
+ local p = lpeg.P(word)
+ pattern = pattern and (pattern + p) or p
+ end
+
+ local AB = B + EOS -- ending boundary
+ return pattern * AB
+end
+
+-- Identifiers
+local ident = I * (I + D) ^ 0
+local expr = ('.' * ident) ^ 0
+
+local patterns = {
+ { 'whitespace', S '\r\n\f\t\v ' ^ 1 },
+ { 'constant', (P 'true' + 'false' + 'nil') * B },
+ { 'string', singlequoted + doublequoted + longstring },
+ { 'comment', multiline + singleline },
+ { 'number', hexadecimal + maybeexp },
+ { 'operator', P 'not' + '...' + 'and' + '..' + '~=' + '==' + '>=' + '<='
+ + 'or' + S ']{=>^[<;)*(%}+-:,/.#' },
+ { 'keyword', compile_keywords([[
+ break do else elseif end for function if in local repeat return then until while
+ ]]) },
+ { 'identifier', lpeg.Cmt(ident,
+ function(input, index)
+ return expr:match(input, index)
+ end)
+ },
+ { 'error', 1 },
+}
+
+local compiled
+
+local function compile_patterns()
+ if not compiled then
+ local function process(elt)
+ local n, grammar = elt[1], elt[2]
+ return lpeg.Cc(n) * lpeg.P(grammar) * lpeg.Cp()
+ end
+ local any = process(patterns[1])
+ for i = 2, #patterns do
+ any = any + process(patterns[i])
+ end
+ compiled = any
+ end
+
+ return compiled
+end
+
+local function sync(token, lnum, cnum)
+ local lastidx
+ lnum, cnum = lnum or 1, cnum or 1
+ if token:find '\n' then
+ for i in token:gmatch '()\n' do
+ lnum = lnum + 1
+ lastidx = i
+ end
+ cnum = #token - lastidx + 1
+ else
+ cnum = cnum + #token
+ end
+ return lnum, cnum
+end
+
+local exports = {}
+
+exports.gmatch = function(input)
+ local parser = compile_patterns()
+ local index, lnum, cnum = 1, 1, 1
+
+ return function()
+ local kind, after = parser:match(input, index)
+ if kind and after then
+ local text = input:sub(index, after - 1)
+ local oldlnum, oldcnum = lnum, cnum
+ index = after
+ lnum, cnum = sync(text, lnum, cnum)
+ return kind, text, oldlnum, oldcnum
+ end
+ end
+end
+
+exports.lex_to_table = function(input)
+ local out = {}
+
+ for kind, text, lnum, cnum in exports.gmatch(input) do
+ out[#out + 1] = { kind, text, lnum, cnum }
+ end
+
+ return out
+end
+
+return exports
+