From 133a45c109da5310add55824db21af5239951f93 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 23:30:40 +0200 Subject: Adding upstream version 3.8.1. Signed-off-by: Daniel Baumann --- src/lua/lua_text.c | 1789 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1789 insertions(+) create mode 100644 src/lua/lua_text.c (limited to 'src/lua/lua_text.c') diff --git a/src/lua/lua_text.c b/src/lua/lua_text.c new file mode 100644 index 0000000..26a5c08 --- /dev/null +++ b/src/lua/lua_text.c @@ -0,0 +1,1789 @@ +/*- + * Copyright 2019 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lua_common.h" +#include "libcryptobox/cryptobox.h" +#include "contrib/fastutf8/fastutf8.h" +#include "unix-std.h" + +/*** + * @module rspamd_text + * This module provides access to opaque text structures used widely to prevent + * copying between Lua and C for various concerns: performance, security etc... + * + * You can convert rspamd_text into string but it will copy data. + */ + +/*** + * @function rspamd_text.fromstring(str) + * Creates rspamd_text from Lua string (copied to the text) + * @param {string} str string to use + * @return {rspamd_text} resulting text + */ +LUA_FUNCTION_DEF(text, fromstring); + +/*** + * @function rspamd_text.null() + * Creates rspamd_text with NULL pointer for testing purposes + * @param {string} str string to use + * @return {rspamd_text} resulting text + */ +LUA_FUNCTION_DEF(text, null); +/*** + * @function rspamd_text.randombytes(nbytes) + * Creates rspamd_text with random bytes inside (raw bytes) + * @param {number} nbytes number of random bytes generated + * @return {rspamd_text} random bytes text + */ +LUA_FUNCTION_DEF(text, randombytes); + +/*** + * @function rspamd_text.fromtable(tbl[, delim]) + * Same as `table.concat` but generates rspamd_text instead of the Lua string + * @param {table} tbl table to use + * @param {string} delim optional delimiter + * @return {rspamd_text} resulting text + */ +LUA_FUNCTION_DEF(text, fromtable); +/*** + * @method rspamd_text:byte(pos[, pos2]) + * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified + * @param {integer} pos index + * @param {integer} pos2 index + * @return {integer} byte at the position `pos` or varargs of bytes + */ +LUA_FUNCTION_DEF(text, byte); +/*** + * @method rspamd_text:len() + * Returns length of a string + * @return {number} length of string in **bytes** + */ +LUA_FUNCTION_DEF(text, len); +/*** + * @method rspamd_text:str() + * Converts text to string by copying its content + * @return {string} copy of text as Lua string + */ +LUA_FUNCTION_DEF(text, str); +/*** + * @method rspamd_text:ptr() + * Converts text to lightuserdata + * @return {lightuserdata} pointer value of rspamd_text + */ +LUA_FUNCTION_DEF(text, ptr); +/*** + * @method rspamd_text:save_in_file(fname[, mode]) + * Saves text in file + * @return {boolean} true if save has been completed + */ +LUA_FUNCTION_DEF(text, save_in_file); +/*** + * @method rspamd_text:span(start[, len]) + * Returns a span for lua_text starting at pos [start] (1 indexed) and with + * length `len` (or to the end of the text) + * @param {integer} start start index + * @param {integer} len length of span + * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...) + */ +LUA_FUNCTION_DEF(text, span); +/*** + * @method rspamd_text:sub(start[, len]) + * Returns a substring for lua_text similar to string.sub from Lua + * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...) + */ +LUA_FUNCTION_DEF(text, sub); +/*** + * @method rspamd_text:lines([stringify]) + * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true + * @param {boolean} stringify stringify lines + * @return {iterator} iterator triplet + */ +LUA_FUNCTION_DEF(text, lines); +/*** + * @method rspamd_text:split(regexp, [stringify]) + * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true + * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting + * @param {boolean} stringify stringify lines + * @return {iterator} iterator triplet + */ +LUA_FUNCTION_DEF(text, split); +/*** + * @method rspamd_text:at(pos) + * Returns a byte at the position `pos` + * @param {integer} pos index + * @return {integer} byte at the position `pos` or nil if pos out of bound + */ +LUA_FUNCTION_DEF(text, at); +/*** + * @method rspamd_text:memchr(chr, [reverse]) + * Returns the first or the last position of the character `chr` in the text or + * -1 in case if a character has not been found. Indexes start from `1` + * @param {string/number} chr character or a character code to find + * @param {boolean} reverse last character if `true` + * @return {integer} position of the character or `-1` + */ +LUA_FUNCTION_DEF(text, memchr); +/*** + * @method rspamd_text:bytes() + * Converts text to an array of bytes + * @return {table|integer} bytes in the array (as unsigned char) + */ +LUA_FUNCTION_DEF(text, bytes); +/*** + * @method rspamd_text:lower([is_utf, [inplace]]) + * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase + * @param {boolean} is_utf apply utf8 lowercase + * @param {boolean} inplace lowercase the original text + * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters + */ +LUA_FUNCTION_DEF(text, lower); +LUA_FUNCTION_DEF(text, take_ownership); +/*** + * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy]) + * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned) + * where all chars from `set_to_exclude` are removed + * Patterns supported: + * + * - %s - all space characters + * - %n - all newline characters + * - %c - all control characters (it includes 8bit characters and spaces) + * - %8 - all 8 bit characters + * - %% - just a percent character + * + * @param {string} set_to_exclude characters to exclude + * @param {boolean} always_copy always copy the source text + * @return {rspamd_text} modified or copied text + */ +LUA_FUNCTION_DEF(text, exclude_chars); +/*** + * @method rspamd_text:oneline([always_copy]) + * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned) + * where the following transformations are made: + * - All spaces sequences are replaced with a single space + * - All newlines sequences are replaced with a single space + * - Trailing and leading spaces are removed + * - Control characters are excluded + * - UTF8 sequences are normalised + * + * @param {boolean} always_copy always copy the source text + * @return {rspamd_text} modified or copied text + */ +LUA_FUNCTION_DEF(text, oneline); +/*** + * @method rspamd_text:base32([b32type]) + * Returns a text encoded in base32 (new rspamd_text is allocated) + * + * @param {string} b32type base32 type (default, bleach, rfc) + * @return {rspamd_text} new text encoded in base32 + */ +LUA_FUNCTION_DEF(text, base32); +/*** + * @method rspamd_text:base64([line_length, [nline, [fold]]]) + * Returns a text encoded in base64 (new rspamd_text is allocated) + * + * @param {number} line_length return text split with newlines up to this attribute + * @param {string} nline newline type: `cr`, `lf`, `crlf` + * @param {boolean} fold use folding when splitting into lines (false by default) + * @return {rspamd_text} new text encoded in base64 + */ +LUA_FUNCTION_DEF(text, base64); +/*** + * @method rspamd_text:hex() + * Returns a text encoded in hex (new rspamd_text is allocated) + * + * @return {rspamd_text} new text encoded in hex + */ +LUA_FUNCTION_DEF(text, hex); +/*** + * @method rspamd_text:find(pattern [, init]) + * Looks for the first match of pattern in the string s. + * If it finds a match, then find returns the indices of s where this occurrence + * starts and ends; otherwise, it returns nil. A third, + * optional numerical argument init specifies where to start the search; + * its default value is 1 and can be negative. + * This method currently supports merely a plain search, no patterns. + * + * @param {string} pattern pattern to find + * @param {number} init specifies where to start the search (1 default) + * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil + */ +LUA_FUNCTION_DEF(text, find); +LUA_FUNCTION_DEF(text, gc); +LUA_FUNCTION_DEF(text, eq); +LUA_FUNCTION_DEF(text, lt); +LUA_FUNCTION_DEF(text, concat); +LUA_FUNCTION_DEF(text, strtoul); + +static const struct luaL_reg textlib_f[] = { + LUA_INTERFACE_DEF(text, fromstring), + {"from_string", lua_text_fromstring}, + LUA_INTERFACE_DEF(text, fromtable), + {"from_table", lua_text_fromtable}, + LUA_INTERFACE_DEF(text, null), + LUA_INTERFACE_DEF(text, randombytes), + {NULL, NULL}}; + +static const struct luaL_reg textlib_m[] = { + LUA_INTERFACE_DEF(text, len), + LUA_INTERFACE_DEF(text, str), + LUA_INTERFACE_DEF(text, ptr), + LUA_INTERFACE_DEF(text, take_ownership), + LUA_INTERFACE_DEF(text, save_in_file), + LUA_INTERFACE_DEF(text, span), + LUA_INTERFACE_DEF(text, sub), + LUA_INTERFACE_DEF(text, lines), + LUA_INTERFACE_DEF(text, split), + LUA_INTERFACE_DEF(text, at), + LUA_INTERFACE_DEF(text, memchr), + LUA_INTERFACE_DEF(text, byte), + LUA_INTERFACE_DEF(text, bytes), + LUA_INTERFACE_DEF(text, lower), + LUA_INTERFACE_DEF(text, exclude_chars), + LUA_INTERFACE_DEF(text, oneline), + LUA_INTERFACE_DEF(text, base32), + LUA_INTERFACE_DEF(text, base64), + LUA_INTERFACE_DEF(text, hex), + LUA_INTERFACE_DEF(text, find), + LUA_INTERFACE_DEF(text, strtoul), + {"write", lua_text_save_in_file}, + {"__len", lua_text_len}, + {"__tostring", lua_text_str}, + {"__gc", lua_text_gc}, + {"__eq", lua_text_eq}, + {"__lt", lua_text_lt}, + {"__concat", lua_text_concat}, + {NULL, NULL}}; + +struct rspamd_lua_text * +lua_check_text(lua_State *L, gint pos) +{ + void *ud = rspamd_lua_check_udata(L, pos, "rspamd{text}"); + luaL_argcheck(L, ud != NULL, pos, "'text' expected"); + return ud ? (struct rspamd_lua_text *) ud : NULL; +} + +struct rspamd_lua_text * +lua_check_text_or_string(lua_State *L, gint pos) +{ + gint pos_type = lua_type(L, pos); + + if (pos_type == LUA_TUSERDATA) { + void *ud = rspamd_lua_check_udata(L, pos, "rspamd{text}"); + luaL_argcheck(L, ud != NULL, pos, "'text' expected"); + return ud ? (struct rspamd_lua_text *) ud : NULL; + } + else if (pos_type == LUA_TSTRING) { + /* + * Fake static lua_text, we allow to use this function multiple times + * by having a small array of static structures. + */ + static unsigned cur_txt_idx = 0; + static struct rspamd_lua_text fake_text[4]; + gsize len; + int sel_idx; + + sel_idx = cur_txt_idx++ % G_N_ELEMENTS(fake_text); + fake_text[sel_idx].start = lua_tolstring(L, pos, &len); + + if (len >= G_MAXUINT) { + return NULL; + } + + fake_text[sel_idx].len = len; + fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE; + + return &fake_text[sel_idx]; + } + + return NULL; +} + +struct rspamd_lua_text * +lua_new_text(lua_State *L, const gchar *start, gsize len, gboolean own) +{ + struct rspamd_lua_text *t; + + t = lua_newuserdata(L, sizeof(*t)); + t->flags = 0; + + if (own) { + gchar *storage; + + if (len > 0) { + storage = g_malloc(len); + + if (start != NULL) { + memcpy(storage, start, len); + } + + t->start = storage; + t->flags = RSPAMD_TEXT_FLAG_OWN; + } + else { + t->start = ""; + } + } + else { + t->start = start; + } + + t->len = len; + rspamd_lua_setclass(L, "rspamd{text}", -1); + + return t; +} + +struct rspamd_lua_text * +lua_new_text_task(lua_State *L, struct rspamd_task *task, + const gchar *start, gsize len, gboolean own) +{ + struct rspamd_lua_text *t; + + t = lua_newuserdata(L, sizeof(*t)); + t->flags = 0; + + if (own) { + gchar *storage; + + if (len > 0) { + storage = rspamd_mempool_alloc(task->task_pool, len); + + if (start != NULL) { + memcpy(storage, start, len); + } + + t->start = storage; + } + else { + t->start = ""; + } + } + else { + t->start = start; + } + + t->len = len; + rspamd_lua_setclass(L, "rspamd{text}", -1); + + return t; +} + +bool lua_is_text_binary(struct rspamd_lua_text *t) +{ + if (t == NULL || t->len == 0) { + return false; + } + + if (rspamd_str_has_8bit(t->start, t->len)) { + if (rspamd_fast_utf8_validate(t->start, t->len) == 0) { + return false; + } + return true; + } + + return false; +} + + +static gint +lua_text_fromstring(lua_State *L) +{ + LUA_TRACE_POINT; + const gchar *str; + gsize l = 0; + gboolean transparent = FALSE; + + str = luaL_checklstring(L, 1, &l); + + if (str) { + if (lua_isboolean(L, 2)) { + transparent = lua_toboolean(L, 2); + } + + lua_new_text(L, str, l, !transparent); + } + else { + return luaL_error(L, "invalid arguments"); + } + + + return 1; +} + +static gint +lua_text_null(lua_State *L) +{ + LUA_TRACE_POINT; + + lua_new_text(L, NULL, 0, false); + + return 1; +} + +static gint +lua_text_randombytes(lua_State *L) +{ + LUA_TRACE_POINT; + guint nbytes = luaL_checkinteger(L, 1); + struct rspamd_lua_text *out; + + out = lua_new_text(L, NULL, nbytes, TRUE); + randombytes_buf((char *) out->start, nbytes); + out->len = nbytes; + + return 1; +} + +#define MAX_REC 10 + +static void +lua_text_tbl_length(lua_State *L, gsize dlen, gsize *dest, guint rec) +{ + gsize tblen, stlen; + struct rspamd_lua_text *elt; + + if (rec > MAX_REC) { + luaL_error(L, "lua_text_tbl_length: recursion limit exceeded"); + + return; + } + + tblen = rspamd_lua_table_size(L, -1); + + for (gsize i = 0; i < tblen; i++) { + lua_rawgeti(L, -1, i + 1); + + if (lua_type(L, -1) == LUA_TSTRING) { +#if LUA_VERSION_NUM >= 502 + stlen = lua_rawlen(L, -1); +#else + stlen = lua_objlen(L, -1); +#endif + (*dest) += stlen; + } + else if (lua_type(L, -1) == LUA_TUSERDATA) { + elt = (struct rspamd_lua_text *) lua_touserdata(L, -1); + + if (elt) { + (*dest) += elt->len; + } + } + else if (lua_type(L, -1) == LUA_TTABLE) { + lua_text_tbl_length(L, dlen, dest, rec + 1); + } + + if (i != tblen - 1) { + (*dest) += dlen; + } + + lua_pop(L, 1); + } +} + +static void +lua_text_tbl_append(lua_State *L, + const gchar *delim, + gsize dlen, + gchar **dest, + guint rec) +{ + const gchar *st; + gsize tblen, stlen; + struct rspamd_lua_text *elt; + + if (rec > MAX_REC) { + luaL_error(L, "lua_text_tbl_length: recursion limit exceeded"); + + return; + } + + tblen = rspamd_lua_table_size(L, -1); + + for (guint i = 0; i < tblen; i++) { + lua_rawgeti(L, -1, i + 1); + + if (lua_type(L, -1) == LUA_TSTRING) { + st = lua_tolstring(L, -1, &stlen); + memcpy((*dest), st, stlen); + (*dest) += stlen; + } + else if (lua_type(L, -1) == LUA_TUSERDATA) { + elt = (struct rspamd_lua_text *) lua_touserdata(L, -1); + + if (elt) { + memcpy((*dest), elt->start, elt->len); + (*dest) += elt->len; + } + } + else if (lua_type(L, -1) == LUA_TTABLE) { + lua_text_tbl_append(L, delim, dlen, dest, rec + 1); + } + + if (dlen && i != tblen - 1) { + memcpy((*dest), delim, dlen); + (*dest) += dlen; + } + + lua_pop(L, 1); + } +} + +static gint +lua_text_fromtable(lua_State *L) +{ + LUA_TRACE_POINT; + const gchar *delim = ""; + struct rspamd_lua_text *t; + gsize textlen = 0, dlen, oldtop = lua_gettop(L); + gchar *dest; + + if (!lua_istable(L, 1)) { + return luaL_error(L, "invalid arguments"); + } + + if (lua_type(L, 2) == LUA_TSTRING) { + delim = lua_tolstring(L, 2, &dlen); + } + else { + dlen = 0; + } + + /* Calculate length needed */ + lua_pushvalue(L, 1); + lua_text_tbl_length(L, dlen, &textlen, 0); + lua_pop(L, 1); + + /* Allocate new text */ + t = lua_newuserdata(L, sizeof(*t)); + dest = g_malloc(textlen); + t->start = dest; + t->len = textlen; + t->flags = RSPAMD_TEXT_FLAG_OWN; + rspamd_lua_setclass(L, "rspamd{text}", -1); + + lua_pushvalue(L, 1); + lua_text_tbl_append(L, delim, dlen, &dest, 0); + lua_pop(L, 1); /* Table arg */ + + gint newtop = lua_gettop(L); + g_assert(newtop == oldtop + 1); + + return 1; +} + +static gint +lua_text_len(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gsize l = 0; + + if (t != NULL) { + l = t->len; + } + else { + return luaL_error(L, "invalid arguments"); + } + + lua_pushinteger(L, l); + + return 1; +} + +static gint +lua_text_str(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t != NULL) { + lua_pushlstring(L, t->start, t->len); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_ptr(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t != NULL) { + lua_pushlightuserdata(L, (gpointer) t->start); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_take_ownership(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gchar *dest; + + if (t != NULL) { + if (t->flags & RSPAMD_TEXT_FLAG_OWN) { + /* We already own it */ + lua_pushboolean(L, true); + } + else { + dest = g_malloc(t->len); + memcpy(dest, t->start, t->len); + t->start = dest; + t->flags |= RSPAMD_TEXT_FLAG_OWN; + lua_pushboolean(L, true); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_span(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gint64 start = lua_tointeger(L, 2), len = -1; + + if (t && start >= 1 && start <= t->len) { + if (lua_isnumber(L, 3)) { + len = lua_tonumber(L, 3); + } + + if (len == -1) { + len = t->len - (start - 1); + } + + if (len < 0 || (len > (t->len - (start - 1)))) { + return luaL_error(L, "invalid length"); + } + + lua_new_text(L, t->start + (start - 1), len, FALSE); + } + else { + if (!t) { + return luaL_error(L, "invalid arguments, text required"); + } + else { + return luaL_error(L, "invalid arguments: start offset %d " + "is larger than text len %d", + (int) start, (int) t->len); + } + } + + return 1; +} + +/* Helpers to behave exactly as Lua does */ +static inline gsize +relative_pos_start(gint pos, gsize len) +{ + if (pos > 0) { + return pos; + } + else if (pos == 0) { + return 1; + } + else if (pos < -((gint) len)) { + return 1; + } + + /* Negative pos inside str */ + return len + ((gsize) pos) + 1; +} + +static inline gsize +relative_pos_end(gint pos, gsize len) +{ + if (pos > (gint) len) { + return len; + } + else if (pos >= 0) { + return (size_t) pos; + } + else if (pos < -((gint) len)) { + return 0; + } + + return len + ((gsize) pos) + 1; +} + +static gint +lua_text_sub(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t) { + size_t start = relative_pos_start(luaL_checkinteger(L, 2), + t->len); + size_t end = relative_pos_end(luaL_optinteger(L, 3, -1), + t->len); + + + if (start <= end) { + lua_new_text(L, t->start + (start - 1), + (end - start) + 1, FALSE); + } + else { + lua_new_text(L, "", 0, TRUE); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint64 +rspamd_lua_text_push_line(lua_State *L, + struct rspamd_lua_text *t, + gint64 start_offset, + const gchar *sep_pos, + gboolean stringify) +{ + const gchar *start; + gsize len; + gint64 ret; + + start = t->start + start_offset; + len = sep_pos ? (sep_pos - start) : (t->len - start_offset); + ret = start_offset + len; + + /* Trim line */ + while (len > 0) { + if (start[len - 1] == '\r' || start[len - 1] == '\n') { + len--; + } + else { + break; + } + } + + if (stringify) { + lua_pushlstring(L, start, len); + } + else { + struct rspamd_lua_text *ntext; + + ntext = lua_newuserdata(L, sizeof(*ntext)); + rspamd_lua_setclass(L, "rspamd{text}", -1); + ntext->start = start; + ntext->len = len; + ntext->flags = 0; /* Not own as it must be owned by a top object */ + } + + return ret; +} + +static gint +rspamd_lua_text_readline(lua_State *L) +{ + struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)); + gboolean stringify = lua_toboolean(L, lua_upvalueindex(2)); + gint64 pos = lua_tointeger(L, lua_upvalueindex(3)); + + if (pos < 0) { + return luaL_error(L, "invalid pos: %d", (gint) pos); + } + + if (pos >= t->len) { + /* We are done */ + return 0; + } + + const gchar *sep_pos; + + /* We look just for `\n` ignoring `\r` as it is very rare nowadays */ + sep_pos = memchr(t->start + pos, '\n', t->len - pos); + + if (sep_pos == NULL) { + /* Either last `\n` or `\r` separated text */ + sep_pos = memchr(t->start + pos, '\r', t->len - pos); + } + + pos = rspamd_lua_text_push_line(L, t, pos, sep_pos, stringify); + + /* Skip separators */ + while (pos < t->len) { + if (t->start[pos] == '\n' || t->start[pos] == '\r') { + pos++; + } + else { + break; + } + } + + /* Update pos */ + lua_pushinteger(L, pos); + lua_replace(L, lua_upvalueindex(3)); + + return 1; +} + +static gint +lua_text_lines(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gboolean stringify = FALSE; + + if (t) { + if (lua_isboolean(L, 2)) { + stringify = lua_toboolean(L, 2); + } + + lua_pushvalue(L, 1); + lua_pushboolean(L, stringify); + lua_pushinteger(L, 0); /* Current pos */ + lua_pushcclosure(L, rspamd_lua_text_readline, 3); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +rspamd_lua_text_regexp_split(lua_State *L) +{ + struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)), + *new_t; + struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **) + lua_touserdata(L, lua_upvalueindex(2)); + gboolean stringify = lua_toboolean(L, lua_upvalueindex(3)); + gint64 pos = lua_tointeger(L, lua_upvalueindex(4)); + gboolean matched; + + if (pos < 0) { + return luaL_error(L, "invalid pos: %d", (gint) pos); + } + + if (pos >= t->len) { + /* We are done */ + return 0; + } + + const gchar *start, *end, *old_start; + + end = t->start + pos; + + for (;;) { + old_start = end; + + matched = rspamd_regexp_search(re->re, t->start, t->len, &start, &end, FALSE, + NULL); + + if (matched) { + if (start - old_start > 0) { + if (stringify) { + lua_pushlstring(L, old_start, start - old_start); + } + else { + new_t = lua_newuserdata(L, sizeof(*t)); + rspamd_lua_setclass(L, "rspamd{text}", -1); + new_t->start = old_start; + new_t->len = start - old_start; + new_t->flags = 0; + } + + break; + } + else { + if (start == end) { + matched = FALSE; + break; + } + /* + * All match separators (e.g. starting separator, + * we need to skip it). Continue iterations. + */ + } + } + else { + /* No match, stop */ + break; + } + } + + if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) { + /* No more matches, but we might need to push the last element */ + if (end == NULL) { + end = t->start; + } + /* No separators, need to push the whole remaining part */ + if (stringify) { + lua_pushlstring(L, end, (t->start + t->len) - end); + } + else { + new_t = lua_newuserdata(L, sizeof(*t)); + rspamd_lua_setclass(L, "rspamd{text}", -1); + new_t->start = end; + new_t->len = (t->start + t->len) - end; + new_t->flags = 0; + } + + pos = t->len; + } + else { + + pos = end - t->start; + } + + /* Update pos */ + lua_pushinteger(L, pos); + lua_replace(L, lua_upvalueindex(4)); + + return 1; +} + +static gint +lua_text_split(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + struct rspamd_lua_regexp *re; + gboolean stringify = FALSE, own_re = FALSE; + + if (t == NULL) { + return luaL_error(L, "invalid arguments"); + } + + if (lua_type(L, 2) == LUA_TUSERDATA) { + re = lua_check_regexp(L, 2); + } + else { + rspamd_regexp_t *c_re; + GError *err = NULL; + + c_re = rspamd_regexp_new(lua_tostring(L, 2), NULL, &err); + if (c_re == NULL) { + + gint ret = luaL_error(L, "cannot parse regexp: %s, error: %s", + lua_tostring(L, 2), + err == NULL ? "undefined" : err->message); + if (err) { + g_error_free(err); + } + + return ret; + } + + re = g_malloc0(sizeof(struct rspamd_lua_regexp)); + re->re = c_re; + re->re_pattern = g_strdup(lua_tostring(L, 2)); + re->module = rspamd_lua_get_module_name(L); + own_re = TRUE; + } + + if (re) { + if (lua_isboolean(L, 3)) { + stringify = lua_toboolean(L, 3); + } + + /* Upvalues */ + lua_pushvalue(L, 1); /* text */ + + if (own_re) { + struct rspamd_lua_regexp **pre; + pre = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *)); + rspamd_lua_setclass(L, "rspamd{regexp}", -1); + *pre = re; + } + else { + lua_pushvalue(L, 2); /* regexp */ + } + + lua_pushboolean(L, stringify); + lua_pushinteger(L, 0); /* Current pos */ + lua_pushcclosure(L, rspamd_lua_text_regexp_split, 4); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + + +static gint +lua_text_at(lua_State *L) +{ + return lua_text_byte(L); +} + +static gint +lua_text_byte(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + if (!t) { + return luaL_error(L, "invalid arguments"); + } + + gsize start = relative_pos_start(luaL_optinteger(L, 2, 1), t->len); + gsize end = relative_pos_end(luaL_optinteger(L, 3, start), t->len); + start--; + + if (start >= end) { + return 0; + } + + for (gsize i = start; i < end; i++) { + lua_pushinteger(L, t->start[i]); + } + return end - start; +} + +static gint +lua_text_memchr(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + int c; + bool reverse = false; + + if (lua_isnumber(L, 2)) { + c = lua_tonumber(L, 2); + } + else { + gsize l; + const gchar *str = lua_tolstring(L, 2, &l); + + if (str) { + c = str[0]; + + if (l != 1) { + return luaL_error(L, "need exactly one character to search"); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + } + + if (t) { + void *f; + + if (lua_isboolean(L, 3)) { + reverse = lua_toboolean(L, 3); + } + + if (reverse) { + f = rspamd_memrchr(t->start, c, t->len); + } + else { + f = memchr(t->start, c, t->len); + } + + if (f) { + lua_pushinteger(L, ((const char *) f) - t->start + 1); + } + else { + lua_pushinteger(L, -1); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_bytes(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t) { + lua_createtable(L, t->len, 0); + + for (gsize i = 0; i < t->len; i++) { + lua_pushinteger(L, (guchar) t->start[i]); + lua_rawseti(L, -2, i + 1); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_save_in_file(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + const gchar *fname = NULL; + guint mode = 00644; + gint fd = -1; + gboolean need_close = FALSE; + + if (t != NULL) { + if (lua_type(L, 2) == LUA_TSTRING) { + fname = luaL_checkstring(L, 2); + + if (lua_type(L, 3) == LUA_TNUMBER) { + mode = lua_tointeger(L, 3); + } + } + else if (lua_type(L, 2) == LUA_TNUMBER) { + /* Created fd */ + fd = lua_tointeger(L, 2); + } + + if (fd == -1) { + if (fname) { + fd = rspamd_file_xopen(fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0); + + if (fd == -1) { + lua_pushboolean(L, false); + lua_pushstring(L, strerror(errno)); + + return 2; + } + need_close = TRUE; + } + else { + fd = STDOUT_FILENO; + } + } + + if (write(fd, t->start, t->len) == -1) { + if (fd != STDOUT_FILENO) { + close(fd); + } + + lua_pushboolean(L, false); + lua_pushstring(L, strerror(errno)); + + return 2; + } + + if (need_close) { + close(fd); + } + + lua_pushboolean(L, true); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_gc(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t != NULL) { + g_assert(!(t->flags & RSPAMD_TEXT_FLAG_FAKE)); + + if (t->flags & RSPAMD_TEXT_FLAG_OWN) { + if (t->flags & RSPAMD_TEXT_FLAG_WIPE) { + rspamd_explicit_memzero((guchar *) t->start, t->len); + } + + if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) { + munmap((gpointer) t->start, t->len); + } + else { + if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) { + free((gpointer) t->start); + } + else { + g_free((gpointer) t->start); + } + } + } + } + + return 0; +} + +static gint +lua_text_eq(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), + *t2 = lua_check_text_or_string(L, 2); + + if (t1->len == t2->len) { + lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) == 0); + } + else { + lua_pushboolean(L, false); + } + + return 1; +} + +static gint +lua_text_lt(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), + *t2 = lua_check_text_or_string(L, 2); + + if (t1 && t2) { + if (t1->len == t2->len) { + lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) < 0); + } + else { + lua_pushboolean(L, t1->len < t2->len); + } + } + + return 1; +} + +static gint +lua_text_concat(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1), + *t2 = lua_check_text_or_string(L, 2); + + if (t1 && t2) { + struct rspamd_lua_text *final; + + final = lua_new_text(L, NULL, t1->len + t2->len, TRUE); + memcpy((void *) final->start, t1->start, t1->len); + memcpy((void *) (final->start + t1->len), t2->start, t2->len); + } + + return 1; +} + +static gint +lua_text_wipe(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t != NULL) { + if (t->flags & RSPAMD_TEXT_FLAG_OWN) { + rspamd_explicit_memzero((guchar *) t->start, t->len); + } + else { + return luaL_error(L, "cannot wipe not owned text"); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 0; +} + +static gint +lua_text_base32(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1), *out; + enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT; + + if (t != NULL) { + if (lua_type(L, 2) == LUA_TSTRING) { + btype = rspamd_base32_decode_type_from_str(lua_tostring(L, 2)); + + if (btype == RSPAMD_BASE32_INVALID) { + return luaL_error(L, "invalid b32 type: %s", lua_tostring(L, 2)); + } + } + + out = lua_new_text(L, NULL, t->len * 8 / 5 + 2, TRUE); + out->len = rspamd_encode_base32_buf(t->start, t->len, (gchar *) out->start, + out->len, btype); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_base64(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1), *out; + gsize line_len = 0; + gboolean fold = FALSE; + + if (t != NULL) { + if (lua_type(L, 2) == LUA_TNUMBER) { + line_len = lua_tointeger(L, 2); + + if (line_len <= 8) { + return luaL_error(L, "too small line length (at least 8 is required)"); + } + } + + enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF; + + if (lua_type(L, 3) == LUA_TSTRING) { + const gchar *how_str = lua_tostring(L, 3); + + if (g_ascii_strcasecmp(how_str, "cr") == 0) { + how = RSPAMD_TASK_NEWLINES_CR; + } + else if (g_ascii_strcasecmp(how_str, "lf") == 0) { + how = RSPAMD_TASK_NEWLINES_LF; + } + else if (g_ascii_strcasecmp(how_str, "crlf") != 0) { + return luaL_error(L, "invalid newline style: %s", how_str); + } + } + + if (lua_type(L, 4) == LUA_TBOOLEAN) { + fold = lua_toboolean(L, 4); + } + + gsize sz_len; + + out = lua_newuserdata(L, sizeof(*t)); + out->flags = RSPAMD_TEXT_FLAG_OWN; + out->start = rspamd_encode_base64_common(t->start, t->len, + line_len, &sz_len, fold, how); + out->len = sz_len; + rspamd_lua_setclass(L, "rspamd{text}", -1); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_hex(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1), *out; + + if (t != NULL) { + + out = lua_new_text(L, NULL, t->len * 2, TRUE); + out->len = rspamd_encode_hex_buf(t->start, t->len, (gchar *) out->start, + out->len); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_find(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gsize patlen, init = 1; + const gchar *pat = luaL_checklstring(L, 2, &patlen); + + if (t != NULL && pat != NULL) { + + if (lua_isnumber(L, 3)) { + init = relative_pos_start(lua_tointeger(L, 3), t->len); + } + + init--; + + if (init > t->len) { + return luaL_error(L, "invalid arguments to find: init too large"); + } + + goffset pos = rspamd_substring_search(t->start + init, + t->len - init, + pat, patlen); + + if (pos == -1) { + lua_pushnil(L); + + return 1; + } + + lua_pushinteger(L, pos + 1); + lua_pushinteger(L, pos + patlen); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 2; +} + +#define BITOP(a, b, op) \ + ((a)[(guint64) (b) / (8u * sizeof *(a))] op(guint64) 1 << ((guint64) (b) % (8u * sizeof *(a)))) + +static gint +lua_text_exclude_chars(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + gssize patlen; + const gchar *pat = lua_tolstring(L, 2, &patlen), *p, *end; + gchar *dest, *d; + guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */ + gboolean copy = TRUE; + guint *plen; + + if (t != NULL && pat && patlen > 0) { + if (lua_isboolean(L, 3)) { + copy = lua_toboolean(L, 3); + } + else if (t->flags & RSPAMD_TEXT_FLAG_OWN) { + copy = FALSE; + } + + if (!copy) { + dest = (gchar *) t->start; + plen = &t->len; + lua_pushvalue(L, 1); /* Push text as a result */ + } + else { + /* We need to copy read only text */ + struct rspamd_lua_text *nt; + + dest = g_malloc(t->len); + nt = lua_newuserdata(L, sizeof(*nt)); + rspamd_lua_setclass(L, "rspamd{text}", -1); + nt->len = t->len; + nt->flags = RSPAMD_TEXT_FLAG_OWN; + memcpy(dest, t->start, t->len); + nt->start = dest; + plen = &nt->len; + } + + /* Fill pattern bitset */ + memset(byteset, 0, sizeof byteset); + + while (patlen > 0) { + if (*pat == '%') { + pat++; + patlen--; + + if (patlen > 0) { + /* + * This stuff assumes little endian, but GUINT64_FROM_LE should + * deal with proper conversion + */ + switch (*pat) { + case '%': + BITOP(byteset, *(guchar *) pat, |=); + break; + case 's': + /* "\r\n\t\f " */ + byteset[0] |= GUINT64_FROM_LE(0x100003600LLU); + break; + case 'n': + /* newlines: "\r\n" */ + byteset[0] |= GUINT64_FROM_LE(0x2400LLU); + break; + case '8': + /* 8 bit characters */ + byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); + byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); + break; + case 'c': + /* Non printable (control) characters */ + byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU); + /* Del character */ + byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU); + break; + } + } + else { + /* Last '%' */ + BITOP(byteset, (guchar) '%', |=); + } + } + else { + BITOP(byteset, *(guchar *) pat, |=); + } + + pat++; + patlen--; + } + for (; patlen > 0 && BITOP(byteset, *(guchar *) pat, |=); pat++, patlen--) + ; + + p = t->start; + end = t->start + t->len; + d = dest; + + while (p < end) { + if (!BITOP(byteset, *(guchar *) p, &)) { + *d++ = *p; + } + + p++; + } + + *(plen) = d - dest; + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_oneline(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + const gchar *p, *end; + gchar *dest, *d; + guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */ + gboolean copy = TRUE, seen_8bit = FALSE; + guint *plen; + + if (t != NULL) { + if (lua_isboolean(L, 2)) { + copy = lua_toboolean(L, 2); + } + else if (t->flags & RSPAMD_TEXT_FLAG_OWN) { + copy = FALSE; + } + + if (!copy) { + dest = (gchar *) t->start; + plen = &t->len; + lua_pushvalue(L, 1); /* Push text as a result */ + } + else { + /* We need to copy read only text */ + struct rspamd_lua_text *nt; + + dest = g_malloc(t->len); + nt = lua_newuserdata(L, sizeof(*nt)); + rspamd_lua_setclass(L, "rspamd{text}", -1); + nt->len = t->len; + nt->flags = RSPAMD_TEXT_FLAG_OWN; + memcpy(dest, t->start, t->len); + nt->start = dest; + plen = &nt->len; + } + + /* Fill pattern bitset */ + memset(byteset, 0, sizeof byteset); + /* All spaces */ + byteset[0] |= GUINT64_FROM_LE(0x100003600LLU); + /* Control characters */ + byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU); + /* Del character */ + byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU); + /* 8 bit characters */ + byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); + byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU); + + p = t->start; + end = t->start + t->len; + d = dest; + + while (p < end) { + if (!BITOP(byteset, *(guchar *) p, &)) { + *d++ = *p; + } + else { + if ((*(guchar *) p) & 0x80) { + seen_8bit = TRUE; + *d++ = *p; + } + else { + if (*p == ' ') { + if (d != dest) { + *d++ = *p++; + } + + while (p < end && g_ascii_isspace(*p)) { + p++; + } + + continue; /* To avoid p++ */ + } + else if (*p == '\r' || *p == '\n') { + if (d != dest) { + *d++ = ' '; + p++; + } + + while (p < end && g_ascii_isspace(*p)) { + p++; + } + + continue; /* To avoid p++ */ + } + } + } + + p++; + } + + while (d > dest && g_ascii_isspace(*(d - 1))) { + d--; + } + + if (seen_8bit) { + if (rspamd_fast_utf8_validate(dest, d - dest) != 0) { + /* Need to make it valid :( */ + UChar32 uc; + goffset err_offset; + gsize remain = d - dest; + gchar *nd = dest; + + while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(nd, remain)) > 0) { + gint i = 0; + + err_offset--; /* As it returns it 1 indexed */ + nd += err_offset; + remain -= err_offset; + + /* Each invalid character of input requires 3 bytes of output (+2 bytes) */ + while (i < remain) { + gint old_pos = i; + U8_NEXT(nd, i, remain, uc); + + if (uc < 0) { + nd[old_pos] = '?'; + } + else { + break; + } + } + + nd += i; + remain -= i; + } + } + } + + *(plen) = d - dest; + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_lower(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1), *nt; + gboolean is_utf8 = FALSE, is_inplace = FALSE; + + if (t != NULL) { + if (lua_isboolean(L, 2)) { + is_utf8 = lua_toboolean(L, 2); + } + if (lua_isboolean(L, 3)) { + is_inplace = lua_toboolean(L, 3); + } + + if (is_inplace) { + nt = t; + lua_pushvalue(L, 1); + } + else { + nt = lua_new_text(L, t->start, t->len, TRUE); + } + + if (!is_utf8) { + rspamd_str_lc((gchar *) nt->start, nt->len); + } + else { + rspamd_str_lc_utf8((gchar *) nt->start, nt->len); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_text_strtoul(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (t) { + unsigned long ll; + + if (rspamd_strtoul(t->start, t->len, &ll)) { + lua_pushinteger(L, ll); + } + else { + lua_pushnil(L); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +/* Used to distinguish lua text metatable */ +static const guint rspamd_lua_text_cookie = 0x2b21ef6fU; + +static gint +lua_load_text(lua_State *L) +{ + lua_newtable(L); + lua_pushstring(L, "cookie"); + lua_pushnumber(L, rspamd_lua_text_cookie); + lua_settable(L, -3); + luaL_register(L, NULL, textlib_f); + + return 1; +} + +void luaopen_text(lua_State *L) +{ + rspamd_lua_new_class(L, "rspamd{text}", textlib_m); + lua_pushstring(L, "cookie"); + lua_pushnumber(L, rspamd_lua_text_cookie); + lua_settable(L, -3); + lua_pop(L, 1); + + rspamd_lua_add_preload(L, "rspamd_text", lua_load_text); +} -- cgit v1.2.3