diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /src/lua/lua_tensor.c | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lua/lua_tensor.c')
-rw-r--r-- | src/lua/lua_tensor.c | 817 |
1 files changed, 817 insertions, 0 deletions
diff --git a/src/lua/lua_tensor.c b/src/lua/lua_tensor.c new file mode 100644 index 0000000..75e6139 --- /dev/null +++ b/src/lua/lua_tensor.c @@ -0,0 +1,817 @@ +/*- + * Copyright 2020 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lua_common.h" +#include "lua_tensor.h" +#include "contrib/kann/kautodiff.h" +#include "blas-config.h" + +/*** + * @module rspamd_tensor + * `rspamd_tensor` is a simple Lua library to abstract matrices and vectors + * Internally, they are represented as arrays of float variables + * So far, merely 1D and 2D tensors are supported + */ + +LUA_FUNCTION_DEF(tensor, load); +LUA_FUNCTION_DEF(tensor, save); +LUA_FUNCTION_DEF(tensor, new); +LUA_FUNCTION_DEF(tensor, fromtable); +LUA_FUNCTION_DEF(tensor, destroy); +LUA_FUNCTION_DEF(tensor, mul); +LUA_FUNCTION_DEF(tensor, tostring); +LUA_FUNCTION_DEF(tensor, index); +LUA_FUNCTION_DEF(tensor, newindex); +LUA_FUNCTION_DEF(tensor, len); +LUA_FUNCTION_DEF(tensor, eigen); +LUA_FUNCTION_DEF(tensor, mean); +LUA_FUNCTION_DEF(tensor, transpose); +LUA_FUNCTION_DEF(tensor, has_blas); +LUA_FUNCTION_DEF(tensor, scatter_matrix); + +static luaL_reg rspamd_tensor_f[] = { + LUA_INTERFACE_DEF(tensor, load), + LUA_INTERFACE_DEF(tensor, new), + LUA_INTERFACE_DEF(tensor, fromtable), + LUA_INTERFACE_DEF(tensor, has_blas), + LUA_INTERFACE_DEF(tensor, scatter_matrix), + {NULL, NULL}, +}; + +static luaL_reg rspamd_tensor_m[] = { + LUA_INTERFACE_DEF(tensor, save), + {"__gc", lua_tensor_destroy}, + {"__mul", lua_tensor_mul}, + {"mul", lua_tensor_mul}, + {"tostring", lua_tensor_tostring}, + {"__tostring", lua_tensor_tostring}, + {"__index", lua_tensor_index}, + {"__newindex", lua_tensor_newindex}, + {"__len", lua_tensor_len}, + LUA_INTERFACE_DEF(tensor, eigen), + LUA_INTERFACE_DEF(tensor, mean), + LUA_INTERFACE_DEF(tensor, transpose), + {NULL, NULL}, +}; + +struct rspamd_lua_tensor * +lua_newtensor(lua_State *L, int ndims, const int *dim, bool zero_fill, bool own) +{ + struct rspamd_lua_tensor *res; + + res = lua_newuserdata(L, sizeof(struct rspamd_lua_tensor)); + memset(res, 0, sizeof(*res)); + + res->ndims = ndims; + res->size = 1; + + for (guint i = 0; i < ndims; i++) { + res->size *= dim[i]; + res->dim[i] = dim[i]; + } + + /* To avoid allocating large stuff in Lua */ + if (own) { + res->data = g_malloc(sizeof(rspamd_tensor_num_t) * res->size); + + if (zero_fill) { + memset(res->data, 0, sizeof(rspamd_tensor_num_t) * res->size); + } + } + else { + /* Mark size negative to distinguish */ + res->size = -(res->size); + } + + rspamd_lua_setclass(L, TENSOR_CLASS, -1); + + return res; +} + +/*** + * @function tensor.new(ndims, [dim1, ... dimN]) + * Creates a new zero filled tensor with the specific number of dimensions + * @return + */ +static gint +lua_tensor_new(lua_State *L) +{ + gint ndims = luaL_checkinteger(L, 1); + + if (ndims > 0 && ndims <= 2) { + gint *dims = g_alloca(sizeof(gint) * ndims); + + for (guint i = 0; i < ndims; i++) { + dims[i] = lua_tointeger(L, i + 2); + } + + (void) lua_newtensor(L, ndims, dims, true, true); + } + else { + return luaL_error(L, "incorrect dimensions number: %d", ndims); + } + + return 1; +} + +/*** + * @function tensor.fromtable(tbl) + * Creates a new zero filled tensor with the specific number of dimensions + * @return + */ +static gint +lua_tensor_fromtable(lua_State *L) +{ + if (lua_istable(L, 1)) { + lua_rawgeti(L, 1, 1); + + if (lua_isnumber(L, -1)) { + lua_pop(L, 1); + /* Input vector */ + gint dims[2]; + dims[0] = 1; + dims[1] = rspamd_lua_table_size(L, 1); + + struct rspamd_lua_tensor *res = lua_newtensor(L, 2, + dims, false, true); + + for (guint i = 0; i < dims[1]; i++) { + lua_rawgeti(L, 1, i + 1); + res->data[i] = lua_tonumber(L, -1); + lua_pop(L, 1); + } + } + else if (lua_istable(L, -1)) { + /* Input matrix */ + lua_pop(L, 1); + + /* Calculate the overall size */ + gint nrows = rspamd_lua_table_size(L, 1), ncols = 0; + gint err; + + for (gint i = 0; i < nrows; i++) { + lua_rawgeti(L, 1, i + 1); + + if (ncols == 0) { + ncols = rspamd_lua_table_size(L, -1); + + if (ncols == 0) { + lua_pop(L, 1); + err = luaL_error(L, "invalid params at pos %d: " + "bad input dimension %d", + i, + (int) ncols); + + return err; + } + } + else { + if (ncols != rspamd_lua_table_size(L, -1)) { + gint t = rspamd_lua_table_size(L, -1); + + lua_pop(L, 1); + err = luaL_error(L, "invalid params at pos %d: " + "bad input dimension %d; %d expected", + i, + t, + ncols); + + return err; + } + } + + lua_pop(L, 1); + } + + gint dims[2]; + dims[0] = nrows; + dims[1] = ncols; + + struct rspamd_lua_tensor *res = lua_newtensor(L, 2, + dims, false, true); + + for (gint i = 0; i < nrows; i++) { + lua_rawgeti(L, 1, i + 1); + + for (gint j = 0; j < ncols; j++) { + lua_rawgeti(L, -1, j + 1); + + res->data[i * ncols + j] = lua_tonumber(L, -1); + + lua_pop(L, 1); + } + + lua_pop(L, 1); + } + } + else { + lua_pop(L, 1); + return luaL_error(L, "incorrect table"); + } + } + else { + return luaL_error(L, "incorrect input"); + } + + return 1; +} + + +/*** + * @method tensor:destroy() + * Tensor destructor + * @return + */ +static gint +lua_tensor_destroy(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + + if (t) { + if (t->size > 0) { + g_free(t->data); + } + } + + return 0; +} + +/*** + * @method tensor:save() + * Tensor serialisation function + * @return + */ +static gint +lua_tensor_save(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + gint size; + + if (t) { + if (t->size > 0) { + size = t->size; + } + else { + size = -(t->size); + } + + gsize sz = sizeof(gint) * 4 + size * sizeof(rspamd_tensor_num_t); + guchar *data; + + struct rspamd_lua_text *out = lua_new_text(L, NULL, 0, TRUE); + + data = g_malloc(sz); + memcpy(data, &t->ndims, sizeof(int)); + memcpy(data + sizeof(int), &size, sizeof(int)); + memcpy(data + 2 * sizeof(int), t->dim, sizeof(int) * 2); + memcpy(data + 4 * sizeof(int), t->data, + size * sizeof(rspamd_tensor_num_t)); + + out->start = (const gchar *) data; + out->len = sz; + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_tensor_tostring(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + + if (t) { + GString *out = g_string_sized_new(128); + + if (t->ndims == 1) { + /* Print as a vector */ + for (gint i = 0; i < t->dim[0]; i++) { + rspamd_printf_gstring(out, "%.4f ", t->data[i]); + } + /* Trim last space */ + out->len--; + } + else { + for (gint i = 0; i < t->dim[0]; i++) { + for (gint j = 0; j < t->dim[1]; j++) { + rspamd_printf_gstring(out, "%.4f ", + t->data[i * t->dim[1] + j]); + } + /* Trim last space */ + out->len--; + rspamd_printf_gstring(out, "\n"); + } + /* Trim last ; */ + out->len--; + } + + lua_pushlstring(L, out->str, out->len); + + g_string_free(out, TRUE); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_tensor_index(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + gint idx; + + if (t) { + if (lua_isnumber(L, 2)) { + idx = lua_tointeger(L, 2); + + if (t->ndims == 1) { + /* Individual element */ + if (idx <= t->dim[0]) { + lua_pushnumber(L, t->data[idx - 1]); + } + else { + lua_pushnil(L); + } + } + else { + /* Push row */ + gint dim = t->dim[1]; + + + if (idx <= t->dim[0]) { + /* Non-owning tensor */ + struct rspamd_lua_tensor *res = + lua_newtensor(L, 1, &dim, false, false); + res->data = &t->data[(idx - 1) * t->dim[1]]; + } + else { + lua_pushnil(L); + } + } + } + else if (lua_isstring(L, 2)) { + /* Access to methods */ + lua_getmetatable(L, 1); + lua_pushvalue(L, 2); + lua_rawget(L, -2); + } + } + + return 1; +} +static gint +lua_tensor_newindex(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + gint idx; + + if (t) { + if (lua_isnumber(L, 2)) { + idx = lua_tointeger(L, 2); + + if (t->ndims == 1) { + /* Individual element */ + if (idx <= t->dim[0] && idx > 0) { + rspamd_tensor_num_t value = lua_tonumber(L, 3), old; + + old = t->data[idx - 1]; + t->data[idx - 1] = value; + lua_pushnumber(L, old); + } + else { + return luaL_error(L, "invalid index: %d", idx); + } + } + else { + if (lua_isnumber(L, 3)) { + return luaL_error(L, "cannot assign number to a row"); + } + else if (lua_isuserdata(L, 3)) { + /* Tensor assignment */ + struct rspamd_lua_tensor *row = lua_check_tensor(L, 3); + + if (row) { + if (row->ndims == 1) { + if (row->dim[0] == t->dim[1]) { + if (idx > 0 && idx <= t->dim[0]) { + idx--; /* Zero based index */ + memcpy(&t->data[idx * t->dim[1]], + row->data, + t->dim[1] * sizeof(rspamd_tensor_num_t)); + + return 0; + } + else { + return luaL_error(L, "invalid index: %d", idx); + } + } + } + else { + return luaL_error(L, "cannot assign matrix to row"); + } + } + else { + return luaL_error(L, "cannot assign row, invalid tensor"); + } + } + else { + /* TODO: add table assignment */ + return luaL_error(L, "cannot assign row, not a tensor"); + } + } + } + else { + /* Access to methods? NYI */ + return luaL_error(L, "cannot assign method of a tensor"); + } + } + + return 1; +} + +/*** + * @method tensor:mul(other, [transA, [transB]]) + * Multiply two tensors (optionally transposed) and return a new tensor + * @return + */ +static gint +lua_tensor_mul(lua_State *L) +{ + struct rspamd_lua_tensor *t1 = lua_check_tensor(L, 1), + *t2 = lua_check_tensor(L, 2), *res; + int transA = 0, transB = 0; + + if (lua_isboolean(L, 3)) { + transA = lua_toboolean(L, 3); + } + + if (lua_isboolean(L, 4)) { + transB = lua_toboolean(L, 4); + } + + if (t1 && t2) { + gint dims[2], shadow_dims[2]; + dims[0] = abs(transA ? t1->dim[1] : t1->dim[0]); + shadow_dims[0] = abs(transB ? t2->dim[1] : t2->dim[0]); + dims[1] = abs(transB ? t2->dim[0] : t2->dim[1]); + shadow_dims[1] = abs(transA ? t1->dim[0] : t1->dim[1]); + + if (shadow_dims[0] != shadow_dims[1]) { + return luaL_error(L, "incompatible dimensions %d x %d * %d x %d", + dims[0], shadow_dims[1], shadow_dims[0], dims[1]); + } + else if (shadow_dims[0] == 0) { + /* Row * Column -> matrix */ + shadow_dims[0] = 1; + shadow_dims[1] = 1; + } + + if (dims[0] == 0) { + /* Column */ + dims[0] = 1; + + if (dims[1] == 0) { + /* Column * row -> number */ + dims[1] = 1; + } + res = lua_newtensor(L, 2, dims, true, true); + } + else if (dims[1] == 0) { + /* Row */ + res = lua_newtensor(L, 1, dims, true, true); + dims[1] = 1; + } + else { + res = lua_newtensor(L, 2, dims, true, true); + } + + kad_sgemm_simple(transA, transB, dims[0], dims[1], shadow_dims[0], + t1->data, t2->data, res->data); + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +/*** + * @function tensor.load(rspamd_text) + * Deserialize tensor + * @return + */ +static gint +lua_tensor_load(lua_State *L) +{ + const guchar *data; + gsize sz; + + if (lua_type(L, 1) == LUA_TUSERDATA) { + struct rspamd_lua_text *t = lua_check_text(L, 1); + + if (!t) { + return luaL_error(L, "invalid argument"); + } + + data = (const guchar *) t->start; + sz = t->len; + } + else { + data = (const guchar *) lua_tolstring(L, 1, &sz); + } + + if (sz >= sizeof(gint) * 4) { + int ndims, nelts, dims[2]; + + memcpy(&ndims, data, sizeof(int)); + memcpy(&nelts, data + sizeof(int), sizeof(int)); + memcpy(dims, data + sizeof(int) * 2, sizeof(int) * 2); + + if (sz == nelts * sizeof(rspamd_tensor_num_t) + sizeof(int) * 4) { + if (ndims == 1) { + if (nelts == dims[0]) { + struct rspamd_lua_tensor *t = lua_newtensor(L, ndims, dims, false, true); + memcpy(t->data, data + sizeof(int) * 4, nelts * sizeof(rspamd_tensor_num_t)); + } + else { + return luaL_error(L, "invalid argument: bad dims: %d x %d != %d", + dims[0], 1, nelts); + } + } + else if (ndims == 2) { + if (nelts == dims[0] * dims[1]) { + struct rspamd_lua_tensor *t = lua_newtensor(L, ndims, dims, false, true); + memcpy(t->data, data + sizeof(int) * 4, nelts * sizeof(rspamd_tensor_num_t)); + } + else { + return luaL_error(L, "invalid argument: bad dims: %d x %d != %d", + dims[0], dims[1], nelts); + } + } + else { + return luaL_error(L, "invalid argument: bad ndims: %d", ndims); + } + } + else { + return luaL_error(L, "invalid size: %d, %d required, %d elts", (int) sz, + (int) (nelts * sizeof(rspamd_tensor_num_t) + sizeof(int) * 4), + nelts); + } + } + else { + return luaL_error(L, "invalid arguments; sz = %d", (int) sz); + } + + return 1; +} + +static gint +lua_tensor_len(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + gint nret = 1; + + if (t) { + /* Return the main dimension first */ + if (t->ndims == 1) { + lua_pushinteger(L, t->dim[0]); + } + else { + lua_pushinteger(L, t->dim[0]); + lua_pushinteger(L, t->dim[1]); + nret = 2; + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return nret; +} + +static gint +lua_tensor_eigen(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1), *eigen; + + if (t) { + if (t->ndims != 2 || t->dim[0] != t->dim[1]) { + return luaL_error(L, "expected square matrix NxN but got %dx%d", + t->dim[0], t->dim[1]); + } + + eigen = lua_newtensor(L, 1, &t->dim[0], true, true); + + if (!kad_ssyev_simple(t->dim[0], t->data, eigen->data)) { + lua_pop(L, 1); + return luaL_error(L, "kad_ssyev_simple failed (no blas?)"); + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static inline rspamd_tensor_num_t +mean_vec(rspamd_tensor_num_t *x, gsize n) +{ + float sum = rspamd_sum_floats(x, &n); + return sum / (rspamd_tensor_num_t) n; +} + +static gint +lua_tensor_mean(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1); + + if (t) { + if (t->ndims == 1) { + /* Mean of all elements in a vector */ + lua_pushnumber(L, mean_vec(t->data, t->dim[0])); + } + else { + /* Row-wise mean vector output */ + struct rspamd_lua_tensor *res; + + res = lua_newtensor(L, 1, &t->dim[0], false, true); + + for (int i = 0; i < t->dim[0]; i++) { + res->data[i] = mean_vec(&t->data[i * t->dim[1]], t->dim[1]); + } + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_tensor_transpose(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1), *res; + int dims[2]; + + if (t) { + if (t->ndims == 1) { + /* Row to column */ + dims[0] = 1; + dims[1] = t->dim[0]; + res = lua_newtensor(L, 2, dims, false, true); + memcpy(res->data, t->data, t->dim[0] * sizeof(rspamd_tensor_num_t)); + } + else { + /* Cache friendly algorithm */ + struct rspamd_lua_tensor *res; + + dims[0] = t->dim[1]; + dims[1] = t->dim[0]; + res = lua_newtensor(L, 2, dims, false, true); + + static const int block = 32; + + for (int i = 0; i < t->dim[0]; i += block) { + for (int j = 0; j < t->dim[1]; ++j) { + for (int boff = 0; boff < block && i + boff < t->dim[0]; ++boff) { + res->data[j * t->dim[0] + i + boff] = + t->data[(i + boff) * t->dim[1] + j]; + } + } + } + } + } + else { + return luaL_error(L, "invalid arguments"); + } + + return 1; +} + +static gint +lua_tensor_has_blas(lua_State *L) +{ +#ifdef HAVE_CBLAS + lua_pushboolean(L, true); +#else + lua_pushboolean(L, false); +#endif + + return 1; +} + +static gint +lua_tensor_scatter_matrix(lua_State *L) +{ + struct rspamd_lua_tensor *t = lua_check_tensor(L, 1), *res; + int dims[2]; + + if (t) { + if (t->ndims != 2) { + return luaL_error(L, "matrix required"); + } + + /* X * X square matrix */ + dims[0] = t->dim[1]; + dims[1] = t->dim[1]; + res = lua_newtensor(L, 2, dims, true, true); + + /* Auxiliary vars */ + rspamd_tensor_num_t *means, /* means vector */ + *tmp_row, /* temp row for Kahan's algorithm */ + *tmp_square /* temp matrix for multiplications */; + means = g_malloc0(sizeof(rspamd_tensor_num_t) * t->dim[1]); + tmp_row = g_malloc0(sizeof(rspamd_tensor_num_t) * t->dim[1]); + tmp_square = g_malloc(sizeof(rspamd_tensor_num_t) * t->dim[1] * t->dim[1]); + + /* + * Column based means + * means will have s, tmp_row will have c + */ + for (int i = 0; i < t->dim[0]; i++) { + /* Cycle by rows */ + for (int j = 0; j < t->dim[1]; j++) { + rspamd_tensor_num_t v = t->data[i * t->dim[1] + j]; + rspamd_tensor_num_t y = v - tmp_row[j]; + rspamd_tensor_num_t st = means[j] + y; + tmp_row[j] = (st - means[j]) - y; + means[j] = st; + } + } + + for (int j = 0; j < t->dim[1]; j++) { + means[j] /= t->dim[0]; + } + + for (int i = 0; i < t->dim[0]; i++) { + /* Update for each sample */ + for (int j = 0; j < t->dim[1]; j++) { + tmp_row[j] = t->data[i * t->dim[1] + j] - means[j]; + } + + memset(tmp_square, 0, t->dim[1] * t->dim[1] * sizeof(rspamd_tensor_num_t)); + kad_sgemm_simple(1, 0, t->dim[1], t->dim[1], 1, + tmp_row, tmp_row, tmp_square); + + for (int j = 0; j < t->dim[1]; j++) { + kad_saxpy(t->dim[1], 1.0, &tmp_square[j * t->dim[1]], + &res->data[j * t->dim[1]]); + } + } + + g_free(tmp_row); + g_free(means); + g_free(tmp_square); + } + else { + return luaL_error(L, "tensor required"); + } + + return 1; +} + +static gint +lua_load_tensor(lua_State *L) +{ + lua_newtable(L); + luaL_register(L, NULL, rspamd_tensor_f); + + return 1; +} + + +void luaopen_tensor(lua_State *L) +{ + /* Metatables */ + rspamd_lua_new_class(L, TENSOR_CLASS, rspamd_tensor_m); + lua_pop(L, 1); /* No need in metatable... */ + rspamd_lua_add_preload(L, "rspamd_tensor", lua_load_tensor); + lua_settop(L, 0); +} + +struct rspamd_lua_tensor * +lua_check_tensor(lua_State *L, int pos) +{ + void *ud = rspamd_lua_check_udata(L, pos, TENSOR_CLASS); + luaL_argcheck(L, ud != NULL, pos, "'tensor' expected"); + return ud ? ((struct rspamd_lua_tensor *) ud) : NULL; +} |