summaryrefslogtreecommitdiffstats
path: root/src/lua/lua_text.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/lua/lua_text.c
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lua/lua_text.c')
-rw-r--r--src/lua/lua_text.c1789
1 files changed, 1789 insertions, 0 deletions
diff --git a/src/lua/lua_text.c b/src/lua/lua_text.c
new file mode 100644
index 0000000..26a5c08
--- /dev/null
+++ b/src/lua/lua_text.c
@@ -0,0 +1,1789 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lua_common.h"
+#include "libcryptobox/cryptobox.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "unix-std.h"
+
+/***
+ * @module rspamd_text
+ * This module provides access to opaque text structures used widely to prevent
+ * copying between Lua and C for various concerns: performance, security etc...
+ *
+ * You can convert rspamd_text into string but it will copy data.
+ */
+
+/***
+ * @function rspamd_text.fromstring(str)
+ * Creates rspamd_text from Lua string (copied to the text)
+ * @param {string} str string to use
+ * @return {rspamd_text} resulting text
+ */
+LUA_FUNCTION_DEF(text, fromstring);
+
+/***
+ * @function rspamd_text.null()
+ * Creates rspamd_text with NULL pointer for testing purposes
+ * @param {string} str string to use
+ * @return {rspamd_text} resulting text
+ */
+LUA_FUNCTION_DEF(text, null);
+/***
+ * @function rspamd_text.randombytes(nbytes)
+ * Creates rspamd_text with random bytes inside (raw bytes)
+ * @param {number} nbytes number of random bytes generated
+ * @return {rspamd_text} random bytes text
+ */
+LUA_FUNCTION_DEF(text, randombytes);
+
+/***
+ * @function rspamd_text.fromtable(tbl[, delim])
+ * Same as `table.concat` but generates rspamd_text instead of the Lua string
+ * @param {table} tbl table to use
+ * @param {string} delim optional delimiter
+ * @return {rspamd_text} resulting text
+ */
+LUA_FUNCTION_DEF(text, fromtable);
+/***
+ * @method rspamd_text:byte(pos[, pos2])
+ * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
+ * @param {integer} pos index
+ * @param {integer} pos2 index
+ * @return {integer} byte at the position `pos` or varargs of bytes
+ */
+LUA_FUNCTION_DEF(text, byte);
+/***
+ * @method rspamd_text:len()
+ * Returns length of a string
+ * @return {number} length of string in **bytes**
+ */
+LUA_FUNCTION_DEF(text, len);
+/***
+ * @method rspamd_text:str()
+ * Converts text to string by copying its content
+ * @return {string} copy of text as Lua string
+ */
+LUA_FUNCTION_DEF(text, str);
+/***
+ * @method rspamd_text:ptr()
+ * Converts text to lightuserdata
+ * @return {lightuserdata} pointer value of rspamd_text
+ */
+LUA_FUNCTION_DEF(text, ptr);
+/***
+ * @method rspamd_text:save_in_file(fname[, mode])
+ * Saves text in file
+ * @return {boolean} true if save has been completed
+ */
+LUA_FUNCTION_DEF(text, save_in_file);
+/***
+ * @method rspamd_text:span(start[, len])
+ * Returns a span for lua_text starting at pos [start] (1 indexed) and with
+ * length `len` (or to the end of the text)
+ * @param {integer} start start index
+ * @param {integer} len length of span
+ * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
+ */
+LUA_FUNCTION_DEF(text, span);
+/***
+ * @method rspamd_text:sub(start[, len])
+ * Returns a substring for lua_text similar to string.sub from Lua
+ * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
+ */
+LUA_FUNCTION_DEF(text, sub);
+/***
+ * @method rspamd_text:lines([stringify])
+ * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
+ * @param {boolean} stringify stringify lines
+ * @return {iterator} iterator triplet
+ */
+LUA_FUNCTION_DEF(text, lines);
+/***
+ * @method rspamd_text:split(regexp, [stringify])
+ * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
+ * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
+ * @param {boolean} stringify stringify lines
+ * @return {iterator} iterator triplet
+ */
+LUA_FUNCTION_DEF(text, split);
+/***
+ * @method rspamd_text:at(pos)
+ * Returns a byte at the position `pos`
+ * @param {integer} pos index
+ * @return {integer} byte at the position `pos` or nil if pos out of bound
+ */
+LUA_FUNCTION_DEF(text, at);
+/***
+ * @method rspamd_text:memchr(chr, [reverse])
+ * Returns the first or the last position of the character `chr` in the text or
+ * -1 in case if a character has not been found. Indexes start from `1`
+ * @param {string/number} chr character or a character code to find
+ * @param {boolean} reverse last character if `true`
+ * @return {integer} position of the character or `-1`
+ */
+LUA_FUNCTION_DEF(text, memchr);
+/***
+ * @method rspamd_text:bytes()
+ * Converts text to an array of bytes
+ * @return {table|integer} bytes in the array (as unsigned char)
+ */
+LUA_FUNCTION_DEF(text, bytes);
+/***
+ * @method rspamd_text:lower([is_utf, [inplace]])
+ * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
+ * @param {boolean} is_utf apply utf8 lowercase
+ * @param {boolean} inplace lowercase the original text
+ * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
+ */
+LUA_FUNCTION_DEF(text, lower);
+LUA_FUNCTION_DEF(text, take_ownership);
+/***
+ * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
+ * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
+ * where all chars from `set_to_exclude` are removed
+ * Patterns supported:
+ *
+ * - %s - all space characters
+ * - %n - all newline characters
+ * - %c - all control characters (it includes 8bit characters and spaces)
+ * - %8 - all 8 bit characters
+ * - %% - just a percent character
+ *
+ * @param {string} set_to_exclude characters to exclude
+ * @param {boolean} always_copy always copy the source text
+ * @return {rspamd_text} modified or copied text
+ */
+LUA_FUNCTION_DEF(text, exclude_chars);
+/***
+ * @method rspamd_text:oneline([always_copy])
+ * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
+ * where the following transformations are made:
+ * - All spaces sequences are replaced with a single space
+ * - All newlines sequences are replaced with a single space
+ * - Trailing and leading spaces are removed
+ * - Control characters are excluded
+ * - UTF8 sequences are normalised
+ *
+ * @param {boolean} always_copy always copy the source text
+ * @return {rspamd_text} modified or copied text
+ */
+LUA_FUNCTION_DEF(text, oneline);
+/***
+ * @method rspamd_text:base32([b32type])
+ * Returns a text encoded in base32 (new rspamd_text is allocated)
+ *
+ * @param {string} b32type base32 type (default, bleach, rfc)
+ * @return {rspamd_text} new text encoded in base32
+ */
+LUA_FUNCTION_DEF(text, base32);
+/***
+ * @method rspamd_text:base64([line_length, [nline, [fold]]])
+ * Returns a text encoded in base64 (new rspamd_text is allocated)
+ *
+ * @param {number} line_length return text split with newlines up to this attribute
+ * @param {string} nline newline type: `cr`, `lf`, `crlf`
+ * @param {boolean} fold use folding when splitting into lines (false by default)
+ * @return {rspamd_text} new text encoded in base64
+ */
+LUA_FUNCTION_DEF(text, base64);
+/***
+ * @method rspamd_text:hex()
+ * Returns a text encoded in hex (new rspamd_text is allocated)
+ *
+ * @return {rspamd_text} new text encoded in hex
+ */
+LUA_FUNCTION_DEF(text, hex);
+/***
+ * @method rspamd_text:find(pattern [, init])
+ * Looks for the first match of pattern in the string s.
+ * If it finds a match, then find returns the indices of s where this occurrence
+ * starts and ends; otherwise, it returns nil. A third,
+ * optional numerical argument init specifies where to start the search;
+ * its default value is 1 and can be negative.
+ * This method currently supports merely a plain search, no patterns.
+ *
+ * @param {string} pattern pattern to find
+ * @param {number} init specifies where to start the search (1 default)
+ * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
+ */
+LUA_FUNCTION_DEF(text, find);
+LUA_FUNCTION_DEF(text, gc);
+LUA_FUNCTION_DEF(text, eq);
+LUA_FUNCTION_DEF(text, lt);
+LUA_FUNCTION_DEF(text, concat);
+LUA_FUNCTION_DEF(text, strtoul);
+
+static const struct luaL_reg textlib_f[] = {
+ LUA_INTERFACE_DEF(text, fromstring),
+ {"from_string", lua_text_fromstring},
+ LUA_INTERFACE_DEF(text, fromtable),
+ {"from_table", lua_text_fromtable},
+ LUA_INTERFACE_DEF(text, null),
+ LUA_INTERFACE_DEF(text, randombytes),
+ {NULL, NULL}};
+
+static const struct luaL_reg textlib_m[] = {
+ LUA_INTERFACE_DEF(text, len),
+ LUA_INTERFACE_DEF(text, str),
+ LUA_INTERFACE_DEF(text, ptr),
+ LUA_INTERFACE_DEF(text, take_ownership),
+ LUA_INTERFACE_DEF(text, save_in_file),
+ LUA_INTERFACE_DEF(text, span),
+ LUA_INTERFACE_DEF(text, sub),
+ LUA_INTERFACE_DEF(text, lines),
+ LUA_INTERFACE_DEF(text, split),
+ LUA_INTERFACE_DEF(text, at),
+ LUA_INTERFACE_DEF(text, memchr),
+ LUA_INTERFACE_DEF(text, byte),
+ LUA_INTERFACE_DEF(text, bytes),
+ LUA_INTERFACE_DEF(text, lower),
+ LUA_INTERFACE_DEF(text, exclude_chars),
+ LUA_INTERFACE_DEF(text, oneline),
+ LUA_INTERFACE_DEF(text, base32),
+ LUA_INTERFACE_DEF(text, base64),
+ LUA_INTERFACE_DEF(text, hex),
+ LUA_INTERFACE_DEF(text, find),
+ LUA_INTERFACE_DEF(text, strtoul),
+ {"write", lua_text_save_in_file},
+ {"__len", lua_text_len},
+ {"__tostring", lua_text_str},
+ {"__gc", lua_text_gc},
+ {"__eq", lua_text_eq},
+ {"__lt", lua_text_lt},
+ {"__concat", lua_text_concat},
+ {NULL, NULL}};
+
+struct rspamd_lua_text *
+lua_check_text(lua_State *L, gint pos)
+{
+ void *ud = rspamd_lua_check_udata(L, pos, "rspamd{text}");
+ luaL_argcheck(L, ud != NULL, pos, "'text' expected");
+ return ud ? (struct rspamd_lua_text *) ud : NULL;
+}
+
+struct rspamd_lua_text *
+lua_check_text_or_string(lua_State *L, gint pos)
+{
+ gint pos_type = lua_type(L, pos);
+
+ if (pos_type == LUA_TUSERDATA) {
+ void *ud = rspamd_lua_check_udata(L, pos, "rspamd{text}");
+ luaL_argcheck(L, ud != NULL, pos, "'text' expected");
+ return ud ? (struct rspamd_lua_text *) ud : NULL;
+ }
+ else if (pos_type == LUA_TSTRING) {
+ /*
+ * Fake static lua_text, we allow to use this function multiple times
+ * by having a small array of static structures.
+ */
+ static unsigned cur_txt_idx = 0;
+ static struct rspamd_lua_text fake_text[4];
+ gsize len;
+ int sel_idx;
+
+ sel_idx = cur_txt_idx++ % G_N_ELEMENTS(fake_text);
+ fake_text[sel_idx].start = lua_tolstring(L, pos, &len);
+
+ if (len >= G_MAXUINT) {
+ return NULL;
+ }
+
+ fake_text[sel_idx].len = len;
+ fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
+
+ return &fake_text[sel_idx];
+ }
+
+ return NULL;
+}
+
+struct rspamd_lua_text *
+lua_new_text(lua_State *L, const gchar *start, gsize len, gboolean own)
+{
+ struct rspamd_lua_text *t;
+
+ t = lua_newuserdata(L, sizeof(*t));
+ t->flags = 0;
+
+ if (own) {
+ gchar *storage;
+
+ if (len > 0) {
+ storage = g_malloc(len);
+
+ if (start != NULL) {
+ memcpy(storage, start, len);
+ }
+
+ t->start = storage;
+ t->flags = RSPAMD_TEXT_FLAG_OWN;
+ }
+ else {
+ t->start = "";
+ }
+ }
+ else {
+ t->start = start;
+ }
+
+ t->len = len;
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+
+ return t;
+}
+
+struct rspamd_lua_text *
+lua_new_text_task(lua_State *L, struct rspamd_task *task,
+ const gchar *start, gsize len, gboolean own)
+{
+ struct rspamd_lua_text *t;
+
+ t = lua_newuserdata(L, sizeof(*t));
+ t->flags = 0;
+
+ if (own) {
+ gchar *storage;
+
+ if (len > 0) {
+ storage = rspamd_mempool_alloc(task->task_pool, len);
+
+ if (start != NULL) {
+ memcpy(storage, start, len);
+ }
+
+ t->start = storage;
+ }
+ else {
+ t->start = "";
+ }
+ }
+ else {
+ t->start = start;
+ }
+
+ t->len = len;
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+
+ return t;
+}
+
+bool lua_is_text_binary(struct rspamd_lua_text *t)
+{
+ if (t == NULL || t->len == 0) {
+ return false;
+ }
+
+ if (rspamd_str_has_8bit(t->start, t->len)) {
+ if (rspamd_fast_utf8_validate(t->start, t->len) == 0) {
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+
+static gint
+lua_text_fromstring(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ const gchar *str;
+ gsize l = 0;
+ gboolean transparent = FALSE;
+
+ str = luaL_checklstring(L, 1, &l);
+
+ if (str) {
+ if (lua_isboolean(L, 2)) {
+ transparent = lua_toboolean(L, 2);
+ }
+
+ lua_new_text(L, str, l, !transparent);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+
+ return 1;
+}
+
+static gint
+lua_text_null(lua_State *L)
+{
+ LUA_TRACE_POINT;
+
+ lua_new_text(L, NULL, 0, false);
+
+ return 1;
+}
+
+static gint
+lua_text_randombytes(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ guint nbytes = luaL_checkinteger(L, 1);
+ struct rspamd_lua_text *out;
+
+ out = lua_new_text(L, NULL, nbytes, TRUE);
+ randombytes_buf((char *) out->start, nbytes);
+ out->len = nbytes;
+
+ return 1;
+}
+
+#define MAX_REC 10
+
+static void
+lua_text_tbl_length(lua_State *L, gsize dlen, gsize *dest, guint rec)
+{
+ gsize tblen, stlen;
+ struct rspamd_lua_text *elt;
+
+ if (rec > MAX_REC) {
+ luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
+
+ return;
+ }
+
+ tblen = rspamd_lua_table_size(L, -1);
+
+ for (gsize i = 0; i < tblen; i++) {
+ lua_rawgeti(L, -1, i + 1);
+
+ if (lua_type(L, -1) == LUA_TSTRING) {
+#if LUA_VERSION_NUM >= 502
+ stlen = lua_rawlen(L, -1);
+#else
+ stlen = lua_objlen(L, -1);
+#endif
+ (*dest) += stlen;
+ }
+ else if (lua_type(L, -1) == LUA_TUSERDATA) {
+ elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
+
+ if (elt) {
+ (*dest) += elt->len;
+ }
+ }
+ else if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_text_tbl_length(L, dlen, dest, rec + 1);
+ }
+
+ if (i != tblen - 1) {
+ (*dest) += dlen;
+ }
+
+ lua_pop(L, 1);
+ }
+}
+
+static void
+lua_text_tbl_append(lua_State *L,
+ const gchar *delim,
+ gsize dlen,
+ gchar **dest,
+ guint rec)
+{
+ const gchar *st;
+ gsize tblen, stlen;
+ struct rspamd_lua_text *elt;
+
+ if (rec > MAX_REC) {
+ luaL_error(L, "lua_text_tbl_length: recursion limit exceeded");
+
+ return;
+ }
+
+ tblen = rspamd_lua_table_size(L, -1);
+
+ for (guint i = 0; i < tblen; i++) {
+ lua_rawgeti(L, -1, i + 1);
+
+ if (lua_type(L, -1) == LUA_TSTRING) {
+ st = lua_tolstring(L, -1, &stlen);
+ memcpy((*dest), st, stlen);
+ (*dest) += stlen;
+ }
+ else if (lua_type(L, -1) == LUA_TUSERDATA) {
+ elt = (struct rspamd_lua_text *) lua_touserdata(L, -1);
+
+ if (elt) {
+ memcpy((*dest), elt->start, elt->len);
+ (*dest) += elt->len;
+ }
+ }
+ else if (lua_type(L, -1) == LUA_TTABLE) {
+ lua_text_tbl_append(L, delim, dlen, dest, rec + 1);
+ }
+
+ if (dlen && i != tblen - 1) {
+ memcpy((*dest), delim, dlen);
+ (*dest) += dlen;
+ }
+
+ lua_pop(L, 1);
+ }
+}
+
+static gint
+lua_text_fromtable(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ const gchar *delim = "";
+ struct rspamd_lua_text *t;
+ gsize textlen = 0, dlen, oldtop = lua_gettop(L);
+ gchar *dest;
+
+ if (!lua_istable(L, 1)) {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ if (lua_type(L, 2) == LUA_TSTRING) {
+ delim = lua_tolstring(L, 2, &dlen);
+ }
+ else {
+ dlen = 0;
+ }
+
+ /* Calculate length needed */
+ lua_pushvalue(L, 1);
+ lua_text_tbl_length(L, dlen, &textlen, 0);
+ lua_pop(L, 1);
+
+ /* Allocate new text */
+ t = lua_newuserdata(L, sizeof(*t));
+ dest = g_malloc(textlen);
+ t->start = dest;
+ t->len = textlen;
+ t->flags = RSPAMD_TEXT_FLAG_OWN;
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+
+ lua_pushvalue(L, 1);
+ lua_text_tbl_append(L, delim, dlen, &dest, 0);
+ lua_pop(L, 1); /* Table arg */
+
+ gint newtop = lua_gettop(L);
+ g_assert(newtop == oldtop + 1);
+
+ return 1;
+}
+
+static gint
+lua_text_len(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gsize l = 0;
+
+ if (t != NULL) {
+ l = t->len;
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ lua_pushinteger(L, l);
+
+ return 1;
+}
+
+static gint
+lua_text_str(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t != NULL) {
+ lua_pushlstring(L, t->start, t->len);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_ptr(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t != NULL) {
+ lua_pushlightuserdata(L, (gpointer) t->start);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_take_ownership(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gchar *dest;
+
+ if (t != NULL) {
+ if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
+ /* We already own it */
+ lua_pushboolean(L, true);
+ }
+ else {
+ dest = g_malloc(t->len);
+ memcpy(dest, t->start, t->len);
+ t->start = dest;
+ t->flags |= RSPAMD_TEXT_FLAG_OWN;
+ lua_pushboolean(L, true);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_span(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gint64 start = lua_tointeger(L, 2), len = -1;
+
+ if (t && start >= 1 && start <= t->len) {
+ if (lua_isnumber(L, 3)) {
+ len = lua_tonumber(L, 3);
+ }
+
+ if (len == -1) {
+ len = t->len - (start - 1);
+ }
+
+ if (len < 0 || (len > (t->len - (start - 1)))) {
+ return luaL_error(L, "invalid length");
+ }
+
+ lua_new_text(L, t->start + (start - 1), len, FALSE);
+ }
+ else {
+ if (!t) {
+ return luaL_error(L, "invalid arguments, text required");
+ }
+ else {
+ return luaL_error(L, "invalid arguments: start offset %d "
+ "is larger than text len %d",
+ (int) start, (int) t->len);
+ }
+ }
+
+ return 1;
+}
+
+/* Helpers to behave exactly as Lua does */
+static inline gsize
+relative_pos_start(gint pos, gsize len)
+{
+ if (pos > 0) {
+ return pos;
+ }
+ else if (pos == 0) {
+ return 1;
+ }
+ else if (pos < -((gint) len)) {
+ return 1;
+ }
+
+ /* Negative pos inside str */
+ return len + ((gsize) pos) + 1;
+}
+
+static inline gsize
+relative_pos_end(gint pos, gsize len)
+{
+ if (pos > (gint) len) {
+ return len;
+ }
+ else if (pos >= 0) {
+ return (size_t) pos;
+ }
+ else if (pos < -((gint) len)) {
+ return 0;
+ }
+
+ return len + ((gsize) pos) + 1;
+}
+
+static gint
+lua_text_sub(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t) {
+ size_t start = relative_pos_start(luaL_checkinteger(L, 2),
+ t->len);
+ size_t end = relative_pos_end(luaL_optinteger(L, 3, -1),
+ t->len);
+
+
+ if (start <= end) {
+ lua_new_text(L, t->start + (start - 1),
+ (end - start) + 1, FALSE);
+ }
+ else {
+ lua_new_text(L, "", 0, TRUE);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint64
+rspamd_lua_text_push_line(lua_State *L,
+ struct rspamd_lua_text *t,
+ gint64 start_offset,
+ const gchar *sep_pos,
+ gboolean stringify)
+{
+ const gchar *start;
+ gsize len;
+ gint64 ret;
+
+ start = t->start + start_offset;
+ len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
+ ret = start_offset + len;
+
+ /* Trim line */
+ while (len > 0) {
+ if (start[len - 1] == '\r' || start[len - 1] == '\n') {
+ len--;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (stringify) {
+ lua_pushlstring(L, start, len);
+ }
+ else {
+ struct rspamd_lua_text *ntext;
+
+ ntext = lua_newuserdata(L, sizeof(*ntext));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ ntext->start = start;
+ ntext->len = len;
+ ntext->flags = 0; /* Not own as it must be owned by a top object */
+ }
+
+ return ret;
+}
+
+static gint
+rspamd_lua_text_readline(lua_State *L)
+{
+ struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1));
+ gboolean stringify = lua_toboolean(L, lua_upvalueindex(2));
+ gint64 pos = lua_tointeger(L, lua_upvalueindex(3));
+
+ if (pos < 0) {
+ return luaL_error(L, "invalid pos: %d", (gint) pos);
+ }
+
+ if (pos >= t->len) {
+ /* We are done */
+ return 0;
+ }
+
+ const gchar *sep_pos;
+
+ /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
+ sep_pos = memchr(t->start + pos, '\n', t->len - pos);
+
+ if (sep_pos == NULL) {
+ /* Either last `\n` or `\r` separated text */
+ sep_pos = memchr(t->start + pos, '\r', t->len - pos);
+ }
+
+ pos = rspamd_lua_text_push_line(L, t, pos, sep_pos, stringify);
+
+ /* Skip separators */
+ while (pos < t->len) {
+ if (t->start[pos] == '\n' || t->start[pos] == '\r') {
+ pos++;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Update pos */
+ lua_pushinteger(L, pos);
+ lua_replace(L, lua_upvalueindex(3));
+
+ return 1;
+}
+
+static gint
+lua_text_lines(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gboolean stringify = FALSE;
+
+ if (t) {
+ if (lua_isboolean(L, 2)) {
+ stringify = lua_toboolean(L, 2);
+ }
+
+ lua_pushvalue(L, 1);
+ lua_pushboolean(L, stringify);
+ lua_pushinteger(L, 0); /* Current pos */
+ lua_pushcclosure(L, rspamd_lua_text_readline, 3);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+rspamd_lua_text_regexp_split(lua_State *L)
+{
+ struct rspamd_lua_text *t = lua_touserdata(L, lua_upvalueindex(1)),
+ *new_t;
+ struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
+ lua_touserdata(L, lua_upvalueindex(2));
+ gboolean stringify = lua_toboolean(L, lua_upvalueindex(3));
+ gint64 pos = lua_tointeger(L, lua_upvalueindex(4));
+ gboolean matched;
+
+ if (pos < 0) {
+ return luaL_error(L, "invalid pos: %d", (gint) pos);
+ }
+
+ if (pos >= t->len) {
+ /* We are done */
+ return 0;
+ }
+
+ const gchar *start, *end, *old_start;
+
+ end = t->start + pos;
+
+ for (;;) {
+ old_start = end;
+
+ matched = rspamd_regexp_search(re->re, t->start, t->len, &start, &end, FALSE,
+ NULL);
+
+ if (matched) {
+ if (start - old_start > 0) {
+ if (stringify) {
+ lua_pushlstring(L, old_start, start - old_start);
+ }
+ else {
+ new_t = lua_newuserdata(L, sizeof(*t));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ new_t->start = old_start;
+ new_t->len = start - old_start;
+ new_t->flags = 0;
+ }
+
+ break;
+ }
+ else {
+ if (start == end) {
+ matched = FALSE;
+ break;
+ }
+ /*
+ * All match separators (e.g. starting separator,
+ * we need to skip it). Continue iterations.
+ */
+ }
+ }
+ else {
+ /* No match, stop */
+ break;
+ }
+ }
+
+ if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
+ /* No more matches, but we might need to push the last element */
+ if (end == NULL) {
+ end = t->start;
+ }
+ /* No separators, need to push the whole remaining part */
+ if (stringify) {
+ lua_pushlstring(L, end, (t->start + t->len) - end);
+ }
+ else {
+ new_t = lua_newuserdata(L, sizeof(*t));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ new_t->start = end;
+ new_t->len = (t->start + t->len) - end;
+ new_t->flags = 0;
+ }
+
+ pos = t->len;
+ }
+ else {
+
+ pos = end - t->start;
+ }
+
+ /* Update pos */
+ lua_pushinteger(L, pos);
+ lua_replace(L, lua_upvalueindex(4));
+
+ return 1;
+}
+
+static gint
+lua_text_split(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ struct rspamd_lua_regexp *re;
+ gboolean stringify = FALSE, own_re = FALSE;
+
+ if (t == NULL) {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ if (lua_type(L, 2) == LUA_TUSERDATA) {
+ re = lua_check_regexp(L, 2);
+ }
+ else {
+ rspamd_regexp_t *c_re;
+ GError *err = NULL;
+
+ c_re = rspamd_regexp_new(lua_tostring(L, 2), NULL, &err);
+ if (c_re == NULL) {
+
+ gint ret = luaL_error(L, "cannot parse regexp: %s, error: %s",
+ lua_tostring(L, 2),
+ err == NULL ? "undefined" : err->message);
+ if (err) {
+ g_error_free(err);
+ }
+
+ return ret;
+ }
+
+ re = g_malloc0(sizeof(struct rspamd_lua_regexp));
+ re->re = c_re;
+ re->re_pattern = g_strdup(lua_tostring(L, 2));
+ re->module = rspamd_lua_get_module_name(L);
+ own_re = TRUE;
+ }
+
+ if (re) {
+ if (lua_isboolean(L, 3)) {
+ stringify = lua_toboolean(L, 3);
+ }
+
+ /* Upvalues */
+ lua_pushvalue(L, 1); /* text */
+
+ if (own_re) {
+ struct rspamd_lua_regexp **pre;
+ pre = lua_newuserdata(L, sizeof(struct rspamd_lua_regexp *));
+ rspamd_lua_setclass(L, "rspamd{regexp}", -1);
+ *pre = re;
+ }
+ else {
+ lua_pushvalue(L, 2); /* regexp */
+ }
+
+ lua_pushboolean(L, stringify);
+ lua_pushinteger(L, 0); /* Current pos */
+ lua_pushcclosure(L, rspamd_lua_text_regexp_split, 4);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+
+static gint
+lua_text_at(lua_State *L)
+{
+ return lua_text_byte(L);
+}
+
+static gint
+lua_text_byte(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ if (!t) {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ gsize start = relative_pos_start(luaL_optinteger(L, 2, 1), t->len);
+ gsize end = relative_pos_end(luaL_optinteger(L, 3, start), t->len);
+ start--;
+
+ if (start >= end) {
+ return 0;
+ }
+
+ for (gsize i = start; i < end; i++) {
+ lua_pushinteger(L, t->start[i]);
+ }
+ return end - start;
+}
+
+static gint
+lua_text_memchr(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ int c;
+ bool reverse = false;
+
+ if (lua_isnumber(L, 2)) {
+ c = lua_tonumber(L, 2);
+ }
+ else {
+ gsize l;
+ const gchar *str = lua_tolstring(L, 2, &l);
+
+ if (str) {
+ c = str[0];
+
+ if (l != 1) {
+ return luaL_error(L, "need exactly one character to search");
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+ }
+
+ if (t) {
+ void *f;
+
+ if (lua_isboolean(L, 3)) {
+ reverse = lua_toboolean(L, 3);
+ }
+
+ if (reverse) {
+ f = rspamd_memrchr(t->start, c, t->len);
+ }
+ else {
+ f = memchr(t->start, c, t->len);
+ }
+
+ if (f) {
+ lua_pushinteger(L, ((const char *) f) - t->start + 1);
+ }
+ else {
+ lua_pushinteger(L, -1);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_bytes(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t) {
+ lua_createtable(L, t->len, 0);
+
+ for (gsize i = 0; i < t->len; i++) {
+ lua_pushinteger(L, (guchar) t->start[i]);
+ lua_rawseti(L, -2, i + 1);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_save_in_file(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ const gchar *fname = NULL;
+ guint mode = 00644;
+ gint fd = -1;
+ gboolean need_close = FALSE;
+
+ if (t != NULL) {
+ if (lua_type(L, 2) == LUA_TSTRING) {
+ fname = luaL_checkstring(L, 2);
+
+ if (lua_type(L, 3) == LUA_TNUMBER) {
+ mode = lua_tointeger(L, 3);
+ }
+ }
+ else if (lua_type(L, 2) == LUA_TNUMBER) {
+ /* Created fd */
+ fd = lua_tointeger(L, 2);
+ }
+
+ if (fd == -1) {
+ if (fname) {
+ fd = rspamd_file_xopen(fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
+
+ if (fd == -1) {
+ lua_pushboolean(L, false);
+ lua_pushstring(L, strerror(errno));
+
+ return 2;
+ }
+ need_close = TRUE;
+ }
+ else {
+ fd = STDOUT_FILENO;
+ }
+ }
+
+ if (write(fd, t->start, t->len) == -1) {
+ if (fd != STDOUT_FILENO) {
+ close(fd);
+ }
+
+ lua_pushboolean(L, false);
+ lua_pushstring(L, strerror(errno));
+
+ return 2;
+ }
+
+ if (need_close) {
+ close(fd);
+ }
+
+ lua_pushboolean(L, true);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_gc(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t != NULL) {
+ g_assert(!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
+
+ if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
+ if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
+ rspamd_explicit_memzero((guchar *) t->start, t->len);
+ }
+
+ if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
+ munmap((gpointer) t->start, t->len);
+ }
+ else {
+ if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
+ free((gpointer) t->start);
+ }
+ else {
+ g_free((gpointer) t->start);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static gint
+lua_text_eq(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
+ *t2 = lua_check_text_or_string(L, 2);
+
+ if (t1->len == t2->len) {
+ lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) == 0);
+ }
+ else {
+ lua_pushboolean(L, false);
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_lt(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
+ *t2 = lua_check_text_or_string(L, 2);
+
+ if (t1 && t2) {
+ if (t1->len == t2->len) {
+ lua_pushboolean(L, memcmp(t1->start, t2->start, t1->len) < 0);
+ }
+ else {
+ lua_pushboolean(L, t1->len < t2->len);
+ }
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_concat(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t1 = lua_check_text_or_string(L, 1),
+ *t2 = lua_check_text_or_string(L, 2);
+
+ if (t1 && t2) {
+ struct rspamd_lua_text *final;
+
+ final = lua_new_text(L, NULL, t1->len + t2->len, TRUE);
+ memcpy((void *) final->start, t1->start, t1->len);
+ memcpy((void *) (final->start + t1->len), t2->start, t2->len);
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_wipe(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t != NULL) {
+ if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
+ rspamd_explicit_memzero((guchar *) t->start, t->len);
+ }
+ else {
+ return luaL_error(L, "cannot wipe not owned text");
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 0;
+}
+
+static gint
+lua_text_base32(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
+ enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
+
+ if (t != NULL) {
+ if (lua_type(L, 2) == LUA_TSTRING) {
+ btype = rspamd_base32_decode_type_from_str(lua_tostring(L, 2));
+
+ if (btype == RSPAMD_BASE32_INVALID) {
+ return luaL_error(L, "invalid b32 type: %s", lua_tostring(L, 2));
+ }
+ }
+
+ out = lua_new_text(L, NULL, t->len * 8 / 5 + 2, TRUE);
+ out->len = rspamd_encode_base32_buf(t->start, t->len, (gchar *) out->start,
+ out->len, btype);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_base64(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
+ gsize line_len = 0;
+ gboolean fold = FALSE;
+
+ if (t != NULL) {
+ if (lua_type(L, 2) == LUA_TNUMBER) {
+ line_len = lua_tointeger(L, 2);
+
+ if (line_len <= 8) {
+ return luaL_error(L, "too small line length (at least 8 is required)");
+ }
+ }
+
+ enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
+
+ if (lua_type(L, 3) == LUA_TSTRING) {
+ const gchar *how_str = lua_tostring(L, 3);
+
+ if (g_ascii_strcasecmp(how_str, "cr") == 0) {
+ how = RSPAMD_TASK_NEWLINES_CR;
+ }
+ else if (g_ascii_strcasecmp(how_str, "lf") == 0) {
+ how = RSPAMD_TASK_NEWLINES_LF;
+ }
+ else if (g_ascii_strcasecmp(how_str, "crlf") != 0) {
+ return luaL_error(L, "invalid newline style: %s", how_str);
+ }
+ }
+
+ if (lua_type(L, 4) == LUA_TBOOLEAN) {
+ fold = lua_toboolean(L, 4);
+ }
+
+ gsize sz_len;
+
+ out = lua_newuserdata(L, sizeof(*t));
+ out->flags = RSPAMD_TEXT_FLAG_OWN;
+ out->start = rspamd_encode_base64_common(t->start, t->len,
+ line_len, &sz_len, fold, how);
+ out->len = sz_len;
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_hex(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1), *out;
+
+ if (t != NULL) {
+
+ out = lua_new_text(L, NULL, t->len * 2, TRUE);
+ out->len = rspamd_encode_hex_buf(t->start, t->len, (gchar *) out->start,
+ out->len);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_find(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gsize patlen, init = 1;
+ const gchar *pat = luaL_checklstring(L, 2, &patlen);
+
+ if (t != NULL && pat != NULL) {
+
+ if (lua_isnumber(L, 3)) {
+ init = relative_pos_start(lua_tointeger(L, 3), t->len);
+ }
+
+ init--;
+
+ if (init > t->len) {
+ return luaL_error(L, "invalid arguments to find: init too large");
+ }
+
+ goffset pos = rspamd_substring_search(t->start + init,
+ t->len - init,
+ pat, patlen);
+
+ if (pos == -1) {
+ lua_pushnil(L);
+
+ return 1;
+ }
+
+ lua_pushinteger(L, pos + 1);
+ lua_pushinteger(L, pos + patlen);
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 2;
+}
+
+#define BITOP(a, b, op) \
+ ((a)[(guint64) (b) / (8u * sizeof *(a))] op(guint64) 1 << ((guint64) (b) % (8u * sizeof *(a))))
+
+static gint
+lua_text_exclude_chars(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ gssize patlen;
+ const gchar *pat = lua_tolstring(L, 2, &patlen), *p, *end;
+ gchar *dest, *d;
+ guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
+ gboolean copy = TRUE;
+ guint *plen;
+
+ if (t != NULL && pat && patlen > 0) {
+ if (lua_isboolean(L, 3)) {
+ copy = lua_toboolean(L, 3);
+ }
+ else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
+ copy = FALSE;
+ }
+
+ if (!copy) {
+ dest = (gchar *) t->start;
+ plen = &t->len;
+ lua_pushvalue(L, 1); /* Push text as a result */
+ }
+ else {
+ /* We need to copy read only text */
+ struct rspamd_lua_text *nt;
+
+ dest = g_malloc(t->len);
+ nt = lua_newuserdata(L, sizeof(*nt));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ nt->len = t->len;
+ nt->flags = RSPAMD_TEXT_FLAG_OWN;
+ memcpy(dest, t->start, t->len);
+ nt->start = dest;
+ plen = &nt->len;
+ }
+
+ /* Fill pattern bitset */
+ memset(byteset, 0, sizeof byteset);
+
+ while (patlen > 0) {
+ if (*pat == '%') {
+ pat++;
+ patlen--;
+
+ if (patlen > 0) {
+ /*
+ * This stuff assumes little endian, but GUINT64_FROM_LE should
+ * deal with proper conversion
+ */
+ switch (*pat) {
+ case '%':
+ BITOP(byteset, *(guchar *) pat, |=);
+ break;
+ case 's':
+ /* "\r\n\t\f " */
+ byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
+ break;
+ case 'n':
+ /* newlines: "\r\n" */
+ byteset[0] |= GUINT64_FROM_LE(0x2400LLU);
+ break;
+ case '8':
+ /* 8 bit characters */
+ byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
+ byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
+ break;
+ case 'c':
+ /* Non printable (control) characters */
+ byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
+ /* Del character */
+ byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
+ break;
+ }
+ }
+ else {
+ /* Last '%' */
+ BITOP(byteset, (guchar) '%', |=);
+ }
+ }
+ else {
+ BITOP(byteset, *(guchar *) pat, |=);
+ }
+
+ pat++;
+ patlen--;
+ }
+ for (; patlen > 0 && BITOP(byteset, *(guchar *) pat, |=); pat++, patlen--)
+ ;
+
+ p = t->start;
+ end = t->start + t->len;
+ d = dest;
+
+ while (p < end) {
+ if (!BITOP(byteset, *(guchar *) p, &)) {
+ *d++ = *p;
+ }
+
+ p++;
+ }
+
+ *(plen) = d - dest;
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_oneline(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ const gchar *p, *end;
+ gchar *dest, *d;
+ guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
+ gboolean copy = TRUE, seen_8bit = FALSE;
+ guint *plen;
+
+ if (t != NULL) {
+ if (lua_isboolean(L, 2)) {
+ copy = lua_toboolean(L, 2);
+ }
+ else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
+ copy = FALSE;
+ }
+
+ if (!copy) {
+ dest = (gchar *) t->start;
+ plen = &t->len;
+ lua_pushvalue(L, 1); /* Push text as a result */
+ }
+ else {
+ /* We need to copy read only text */
+ struct rspamd_lua_text *nt;
+
+ dest = g_malloc(t->len);
+ nt = lua_newuserdata(L, sizeof(*nt));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ nt->len = t->len;
+ nt->flags = RSPAMD_TEXT_FLAG_OWN;
+ memcpy(dest, t->start, t->len);
+ nt->start = dest;
+ plen = &nt->len;
+ }
+
+ /* Fill pattern bitset */
+ memset(byteset, 0, sizeof byteset);
+ /* All spaces */
+ byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
+ /* Control characters */
+ byteset[0] |= GUINT64_FROM_LE(0xffffffffLLU);
+ /* Del character */
+ byteset[1] |= GUINT64_FROM_LE(0x8000000000000000LLU);
+ /* 8 bit characters */
+ byteset[2] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
+ byteset[3] |= GUINT64_FROM_LE(0xffffffffffffffffLLU);
+
+ p = t->start;
+ end = t->start + t->len;
+ d = dest;
+
+ while (p < end) {
+ if (!BITOP(byteset, *(guchar *) p, &)) {
+ *d++ = *p;
+ }
+ else {
+ if ((*(guchar *) p) & 0x80) {
+ seen_8bit = TRUE;
+ *d++ = *p;
+ }
+ else {
+ if (*p == ' ') {
+ if (d != dest) {
+ *d++ = *p++;
+ }
+
+ while (p < end && g_ascii_isspace(*p)) {
+ p++;
+ }
+
+ continue; /* To avoid p++ */
+ }
+ else if (*p == '\r' || *p == '\n') {
+ if (d != dest) {
+ *d++ = ' ';
+ p++;
+ }
+
+ while (p < end && g_ascii_isspace(*p)) {
+ p++;
+ }
+
+ continue; /* To avoid p++ */
+ }
+ }
+ }
+
+ p++;
+ }
+
+ while (d > dest && g_ascii_isspace(*(d - 1))) {
+ d--;
+ }
+
+ if (seen_8bit) {
+ if (rspamd_fast_utf8_validate(dest, d - dest) != 0) {
+ /* Need to make it valid :( */
+ UChar32 uc;
+ goffset err_offset;
+ gsize remain = d - dest;
+ gchar *nd = dest;
+
+ while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(nd, remain)) > 0) {
+ gint i = 0;
+
+ err_offset--; /* As it returns it 1 indexed */
+ nd += err_offset;
+ remain -= err_offset;
+
+ /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
+ while (i < remain) {
+ gint old_pos = i;
+ U8_NEXT(nd, i, remain, uc);
+
+ if (uc < 0) {
+ nd[old_pos] = '?';
+ }
+ else {
+ break;
+ }
+ }
+
+ nd += i;
+ remain -= i;
+ }
+ }
+ }
+
+ *(plen) = d - dest;
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_lower(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1), *nt;
+ gboolean is_utf8 = FALSE, is_inplace = FALSE;
+
+ if (t != NULL) {
+ if (lua_isboolean(L, 2)) {
+ is_utf8 = lua_toboolean(L, 2);
+ }
+ if (lua_isboolean(L, 3)) {
+ is_inplace = lua_toboolean(L, 3);
+ }
+
+ if (is_inplace) {
+ nt = t;
+ lua_pushvalue(L, 1);
+ }
+ else {
+ nt = lua_new_text(L, t->start, t->len, TRUE);
+ }
+
+ if (!is_utf8) {
+ rspamd_str_lc((gchar *) nt->start, nt->len);
+ }
+ else {
+ rspamd_str_lc_utf8((gchar *) nt->start, nt->len);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+static gint
+lua_text_strtoul(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+
+ if (t) {
+ unsigned long ll;
+
+ if (rspamd_strtoul(t->start, t->len, &ll)) {
+ lua_pushinteger(L, ll);
+ }
+ else {
+ lua_pushnil(L);
+ }
+ }
+ else {
+ return luaL_error(L, "invalid arguments");
+ }
+
+ return 1;
+}
+
+/* Used to distinguish lua text metatable */
+static const guint rspamd_lua_text_cookie = 0x2b21ef6fU;
+
+static gint
+lua_load_text(lua_State *L)
+{
+ lua_newtable(L);
+ lua_pushstring(L, "cookie");
+ lua_pushnumber(L, rspamd_lua_text_cookie);
+ lua_settable(L, -3);
+ luaL_register(L, NULL, textlib_f);
+
+ return 1;
+}
+
+void luaopen_text(lua_State *L)
+{
+ rspamd_lua_new_class(L, "rspamd{text}", textlib_m);
+ lua_pushstring(L, "cookie");
+ lua_pushnumber(L, rspamd_lua_text_cookie);
+ lua_settable(L, -3);
+ lua_pop(L, 1);
+
+ rspamd_lua_add_preload(L, "rspamd_text", lua_load_text);
+}