summaryrefslogtreecommitdiffstats
path: root/src/libmime/received.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libmime/received.cxx
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libmime/received.cxx')
-rw-r--r--src/libmime/received.cxx1017
1 files changed, 1017 insertions, 0 deletions
diff --git a/src/libmime/received.cxx b/src/libmime/received.cxx
new file mode 100644
index 0000000..dc16d9b
--- /dev/null
+++ b/src/libmime/received.cxx
@@ -0,0 +1,1017 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libserver/url.h"
+#include "lua/lua_common.h"
+#include "libserver/cfg_file.h"
+#include "libserver/mempool_vars_internal.h"
+#include "mime_string.hxx"
+#include "smtp_parsers.h"
+#include "message.h"
+#include "received.hxx"
+#include "frozen/string.h"
+#include "frozen/unordered_map.h"
+
+namespace rspamd::mime {
+
+enum class received_part_type {
+ RSPAMD_RECEIVED_PART_FROM,
+ RSPAMD_RECEIVED_PART_BY,
+ RSPAMD_RECEIVED_PART_FOR,
+ RSPAMD_RECEIVED_PART_WITH,
+ RSPAMD_RECEIVED_PART_ID,
+ RSPAMD_RECEIVED_PART_UNKNOWN,
+};
+
+struct received_part {
+ received_part_type type;
+ mime_string data;
+ std::vector<mime_string> comments;
+
+ explicit received_part(received_part_type t)
+ : type(t),
+ data(received_char_filter)
+ {
+ }
+};
+
+static inline auto
+received_part_set_or_append(const gchar *begin,
+ gsize len,
+ mime_string &dest) -> void
+{
+ if (len == 0) {
+ return;
+ }
+
+ dest.append(begin, len);
+ dest.trim(" \t");
+}
+
+static auto
+received_process_part(const std::string_view &data,
+ received_part_type type,
+ std::ptrdiff_t &last,
+ received_part &npart) -> bool
+{
+ auto obraces = 0, ebraces = 0;
+ auto seen_tcpinfo = false;
+ enum _parse_state {
+ skip_spaces,
+ in_comment,
+ read_data,
+ read_tcpinfo,
+ all_done
+ } state,
+ next_state;
+
+ /* In this function, we just process comments and data separately */
+ const auto *p = data.data();
+ const auto *end = p + data.size();
+ const auto *c = p;
+
+ state = skip_spaces;
+ next_state = read_data;
+
+ while (p < end) {
+ switch (state) {
+ case skip_spaces:
+ if (!g_ascii_isspace(*p)) {
+ c = p;
+ state = next_state;
+ }
+ else {
+ p++;
+ }
+ break;
+ case in_comment:
+ if (*p == '(') {
+ obraces++;
+ }
+ else if (*p == ')') {
+ ebraces++;
+
+ if (ebraces >= obraces) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ if (p > c) {
+ npart.comments.emplace_back(received_char_filter);
+ auto &comment = npart.comments.back();
+ received_part_set_or_append(c, p - c,
+ comment);
+ }
+ }
+
+ p++;
+ c = p;
+ state = skip_spaces;
+ next_state = read_data;
+
+ continue;
+ }
+ }
+
+ p++;
+ break;
+ case read_data:
+ if (*p == '(') {
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(c, p - c,
+ npart.data);
+ }
+ }
+
+ state = in_comment;
+ obraces = 1;
+ ebraces = 0;
+ p++;
+ c = p;
+ }
+ else if (g_ascii_isspace(*p)) {
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(c, p - c,
+ npart.data);
+ }
+ }
+
+ state = skip_spaces;
+ next_state = read_data;
+ c = p;
+ }
+ else if (*p == ';') {
+ /* It is actually delimiter of date part if not in the comments */
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(c, p - c,
+ npart.data);
+ }
+ }
+
+ state = all_done;
+ continue;
+ }
+ else if (npart.data.size() > 0) {
+ /* We have already received data and find something with no ( */
+ if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
+ /* Check if we have something special here, such as TCPinfo */
+ if (*c == '[') {
+ state = read_tcpinfo;
+ p++;
+ }
+ else {
+ state = all_done;
+ continue;
+ }
+ }
+ else {
+ state = all_done;
+ continue;
+ }
+ }
+ else {
+ p++;
+ }
+ break;
+ case read_tcpinfo:
+ if (*p == ']') {
+ received_part_set_or_append(c, p - c + 1,
+ npart.data);
+ seen_tcpinfo = TRUE;
+ state = skip_spaces;
+ next_state = read_data;
+ c = p;
+ }
+ p++;
+ break;
+ case all_done:
+ if (p > data.data()) {
+ last = p - data.data();
+ return true;
+ }
+ else {
+ /* Empty element */
+ return false;
+ }
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case read_data:
+ if (p > c) {
+ if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+ received_part_set_or_append(c, p - c,
+ npart.data);
+ }
+
+ last = p - data.data();
+
+ return true;
+ }
+ break;
+ case skip_spaces:
+ if (p > data.data()) {
+ last = p - data.data();
+
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+template<std::size_t N>
+constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
+{
+ for (auto i = 0; i < N; i++) {
+ if (lc_map[(unsigned char) in[i]] != lit[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static auto
+received_spill(const std::string_view &in,
+ std::ptrdiff_t &date_pos) -> std::vector<received_part>
+{
+ std::vector<received_part> parts;
+ std::ptrdiff_t pos = 0;
+ auto seen_from = false, seen_by = false;
+
+ const auto *p = in.data();
+ const auto *end = p + in.size();
+
+ auto skip_spaces = [&p, end]() {
+ while (p < end && g_ascii_isspace(*p)) {
+ p++;
+ }
+ };
+
+ skip_spaces();
+
+ /* Skip SMTP comments */
+ if (*p == '(') {
+ auto obraces = 0, ebraces = 0;
+
+ while (p < end) {
+ if (*p == ')') {
+ ebraces++;
+ }
+ else if (*p == '(') {
+ obraces++;
+ }
+
+ p++;
+
+ if (obraces == ebraces) {
+ /* Skip spaces after */
+ skip_spaces();
+ break;
+ }
+ }
+ }
+
+ auto len = end - p;
+
+ if (len == 0) {
+ return parts;
+ }
+
+ auto maybe_process_part = [&](received_part_type what) -> bool {
+ parts.emplace_back(what);
+ auto &rcvd_part = parts.back();
+ auto chunk = std::string_view{p, (std::size_t)(end - p)};
+
+ if (!received_process_part(chunk, what, pos, rcvd_part)) {
+ parts.pop_back();
+
+ return false;
+ }
+
+ return true;
+ };
+
+ if (len > 4 && lit_compare_lowercase<4>("from", p)) {
+ p += sizeof("from") - 1;
+
+ /* We can now store from part */
+ if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
+ /* Do not accept malformed from */
+ return {};
+ }
+
+ g_assert(pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+ seen_from = true;
+ }
+
+ if (len > 2 && lit_compare_lowercase<2>("by", p)) {
+ p += sizeof("by") - 1;
+
+ if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
+ return {};
+ }
+
+ g_assert(pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+ seen_by = true;
+ }
+
+ if (!seen_from && !seen_by) {
+ /* Useless received */
+ return {};
+ }
+
+ while (p < end) {
+ bool got_part = false;
+ if (*p == ';') {
+ /* We are at the date separator, stop here */
+ date_pos = p - in.data() + 1;
+ break;
+ }
+ else {
+ if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
+ p += sizeof("with") - 1;
+
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
+ }
+ else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
+ p += sizeof("for") - 1;
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
+ }
+ else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
+ p += sizeof("id") - 1;
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
+ }
+ else {
+ while (p < end) {
+ if (!(g_ascii_isspace(*p) || *p == '(' || *p == ';')) {
+ p++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (p == end) {
+ return {};
+ }
+ else if (*p == ';') {
+ date_pos = p - in.data() + 1;
+ break;
+ }
+ else {
+ got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
+ }
+ }
+
+ if (!got_part) {
+ p++;
+ len = end > p ? end - p : 0;
+ }
+ else {
+ g_assert(pos != 0);
+ p += pos;
+ len = end > p ? end - p : 0;
+ }
+ }
+ }
+
+ return parts;
+}
+
+#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
+ (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE | RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
+
+static auto
+received_process_rdns(rspamd_mempool_t *pool,
+ const std::string_view &in,
+ mime_string &dest) -> bool
+{
+ auto seen_dot = false;
+
+ const auto *p = in.data();
+ const auto *end = p + in.size();
+
+ if (in.empty()) {
+ return false;
+ }
+
+ if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
+ /* We have enclosed ip address */
+ auto *addr = rspamd_parse_inet_address_pool(p + 1,
+ (end - p) - 2,
+ pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ const gchar *addr_str;
+
+ if (rspamd_inet_address_get_port(addr) != 0) {
+ addr_str = rspamd_inet_address_to_string_pretty(addr);
+ }
+ else {
+ addr_str = rspamd_inet_address_to_string(addr);
+ }
+
+ dest.assign_copy(std::string_view{addr_str});
+
+ return true;
+ }
+ }
+
+ auto hlen = 0u;
+
+ while (p < end) {
+ if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
+ if (*p == '.') {
+ seen_dot = true;
+ }
+
+ hlen++;
+ }
+ else {
+ break;
+ }
+
+ p++;
+ }
+
+ if (hlen > 0) {
+ if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
+ /* All data looks like a hostname */
+ dest.assign_copy(std::string_view{in.data(), hlen});
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static auto
+received_process_host_tcpinfo(rspamd_mempool_t *pool,
+ received_header &rh,
+ const std::string_view &in) -> bool
+{
+ rspamd_inet_addr_t *addr = nullptr;
+ auto ret = false;
+
+ if (in.empty()) {
+ return false;
+ }
+
+ if (in[0] == '[') {
+ /* Likely Exim version */
+
+ auto brace_pos = in.find(']');
+
+ if (brace_pos != std::string_view::npos) {
+ auto substr_addr = in.substr(1, brace_pos - 1);
+ addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+ substr_addr.size(),
+ pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+ }
+ }
+ }
+ else {
+ if (g_ascii_isxdigit(in[0])) {
+ /* Try to parse IP address */
+ addr = rspamd_parse_inet_address_pool(in.data(),
+ in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+ }
+ }
+
+ if (!addr) {
+ /* Try canonical Postfix version: rdns [ip] */
+ auto obrace_pos = in.find('[');
+
+ if (obrace_pos != std::string_view::npos) {
+ auto ebrace_pos = in.rfind(']');
+
+ if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
+ auto substr_addr = in.substr(obrace_pos + 1,
+ ebrace_pos - obrace_pos - 1);
+ addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+ substr_addr.size(),
+ pool,
+ RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+ if (addr) {
+ rh.addr = addr;
+ rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+
+ /* Process with rDNS */
+ auto rdns_substr = in.substr(0, obrace_pos);
+
+ if (received_process_rdns(pool, rdns_substr, rh.real_hostname)) {
+ ret = true;
+ }
+ }
+ }
+ }
+ else {
+ /* Hostname or some crap, sigh... */
+ if (received_process_rdns(pool, in, rh.real_hostname)) {
+ ret = true;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+static void
+received_process_from(rspamd_mempool_t *pool,
+ const received_part &rpart,
+ received_header &rh)
+{
+ if (rpart.data.size() > 0) {
+ /* We have seen multiple cases:
+ * - [ip] (hostname/unknown [real_ip])
+ * - helo (hostname/unknown [real_ip])
+ * - [ip]
+ * - hostname
+ * - hostname ([ip]:port helo=xxx)
+ * Maybe more...
+ */
+ auto seen_ip_in_data = false;
+
+ if (!rpart.comments.empty()) {
+ /* We can have info within comment as part of RFC */
+ received_process_host_tcpinfo(
+ pool, rh,
+ rpart.comments[0].as_view());
+ }
+
+ if (rh.real_ip.size() == 0) {
+ /* Try to do the same with data */
+ if (received_process_host_tcpinfo(
+ pool, rh,
+ rpart.data.as_view())) {
+ seen_ip_in_data = true;
+ }
+ }
+
+ if (!seen_ip_in_data) {
+ if (rh.real_ip.size() != 0) {
+ /* Get announced hostname (usually helo) */
+ received_process_rdns(pool,
+ rpart.data.as_view(),
+ rh.from_hostname);
+ }
+ else {
+ received_process_host_tcpinfo(pool,
+ rh, rpart.data.as_view());
+ }
+ }
+ }
+ else {
+ /* rpart->dlen = 0 */
+ if (!rpart.comments.empty()) {
+ received_process_host_tcpinfo(
+ pool, rh,
+ rpart.comments[0].as_view());
+ }
+ }
+}
+
+static auto
+received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
+ const std::string_view &in,
+ struct rspamd_mime_header *hdr) -> bool
+{
+ std::ptrdiff_t date_pos = -1;
+
+ static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({{"smtp", received_flags::SMTP},
+ {"esmtp", received_flags::ESMTP},
+ {"esmtpa", received_flags::ESMTPA |
+ received_flags::AUTHENTICATED},
+ {"esmtpsa", received_flags::ESMTPSA |
+ received_flags::SSL |
+ received_flags::AUTHENTICATED},
+ {"esmtps", received_flags::ESMTPS |
+ received_flags::SSL},
+ {"lmtp", received_flags::LMTP},
+ {"imap", received_flags::IMAP},
+ {"imaps", received_flags::IMAP |
+ received_flags::SSL},
+ {"http", received_flags::HTTP},
+ {"https", received_flags::HTTP |
+ received_flags::SSL},
+ {"local", received_flags::LOCAL}});
+
+ auto parts = received_spill(in, date_pos);
+
+ if (parts.empty()) {
+ return false;
+ }
+
+ auto &rh = chain.new_received();
+
+ rh.flags = received_flags::UNKNOWN;
+ rh.hdr = hdr;
+
+ for (const auto &part: parts) {
+ switch (part.type) {
+ case received_part_type::RSPAMD_RECEIVED_PART_FROM:
+ received_process_from(pool, part, rh);
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_BY:
+ received_process_rdns(pool,
+ part.data.as_view(),
+ rh.by_hostname);
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_WITH:
+ if (part.data.size() > 0) {
+ auto proto_flag_it = protos_map.find(part.data.as_view());
+
+ if (proto_flag_it != protos_map.end()) {
+ rh.flags = proto_flag_it->second;
+ }
+ }
+ break;
+ case received_part_type::RSPAMD_RECEIVED_PART_FOR:
+ rh.for_mbox.assign_copy(part.data);
+ rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
+ rh.for_mbox.size());
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
+ }
+
+ if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
+ rh.from_hostname.assign_copy(rh.real_hostname);
+ }
+
+ if (date_pos > 0 && date_pos < in.size()) {
+ auto date_sub = in.substr(date_pos);
+ rh.timestamp = rspamd_parse_smtp_date((const unsigned char *) date_sub.data(),
+ date_sub.size(), nullptr);
+ }
+
+ return true;
+}
+
+static auto
+received_maybe_fix_task(struct rspamd_task *task) -> bool
+{
+ auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
+
+ if (recv_chain_ptr) {
+ auto need_recv_correction = false;
+
+ auto top_recv_maybe = recv_chain_ptr->get_received(0);
+
+ if (top_recv_maybe.has_value()) {
+ auto &top_recv = top_recv_maybe.value().get();
+
+ const auto *raddr = top_recv.addr;
+ if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
+ need_recv_correction = true;
+ }
+ else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
+ if (!raddr) {
+ need_recv_correction = true;
+ }
+ else {
+ if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
+ need_recv_correction = true;
+ }
+ }
+ }
+
+ if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
+ msg_debug_task("the first received seems to be"
+ " not ours, prepend it with fake one");
+
+ auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
+ trecv.flags |= received_flags::ARTIFICIAL;
+
+ if (task->flags & RSPAMD_TASK_FLAG_SSL) {
+ trecv.flags |= received_flags::SSL;
+ }
+
+ if (task->auth_user) {
+ trecv.flags |= received_flags::AUTHENTICATED;
+ }
+
+ trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
+
+ const auto *mta_name = (const char *) rspamd_mempool_get_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MTA_NAME);
+
+ if (mta_name) {
+ trecv.by_hostname.assign_copy(std::string_view(mta_name));
+ }
+ trecv.addr = rspamd_inet_address_copy(task->from_addr,
+ task->task_pool);
+
+ if (task->hostname) {
+ trecv.real_hostname.assign_copy(std::string_view(task->hostname));
+ trecv.from_hostname.assign_copy(trecv.real_hostname);
+ }
+
+ return true;
+ }
+
+ /* Extract data from received header if we were not given IP */
+ if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
+ (task->cfg && !task->cfg->ignore_received)) {
+ if (!top_recv.real_ip.empty()) {
+ if (!rspamd_parse_inet_address(&task->from_addr,
+ top_recv.real_ip.data(),
+ top_recv.real_ip.size(),
+ RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
+ msg_warn_task("cannot get IP from received header: '%s'",
+ top_recv.real_ip.data());
+ task->from_addr = nullptr;
+ }
+ }
+ if (!top_recv.real_hostname.empty()) {
+ task->hostname = top_recv.real_hostname.data();
+ }
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static auto
+received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
+{
+ if (chain == nullptr) {
+ return false;
+ }
+
+ lua_createtable(L, chain->size(), 0);
+
+ auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
+ lua_pushboolean(L, !!(rh.flags & fl));
+ lua_setfield(L, -2, name);
+ };
+
+ auto i = 1;
+
+ for (const auto &rh: chain->as_vector()) {
+ lua_createtable(L, 0, 10);
+
+ if (rh.hdr && rh.hdr->decoded) {
+ rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
+ }
+
+ lua_createtable(L, 0, 3);
+ push_flag(rh, received_flags::ARTIFICIAL, "artificial");
+ push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
+ push_flag(rh, received_flags::SSL, "ssl");
+ lua_setfield(L, -2, "flags");
+
+ auto push_nullable_string = [L](const mime_string &st, const char *field) {
+ if (st.empty()) {
+ lua_pushnil(L);
+ }
+ else {
+ lua_pushlstring(L, st.data(), st.size());
+ }
+ lua_setfield(L, -2, field);
+ };
+
+ push_nullable_string(rh.from_hostname, "from_hostname");
+ push_nullable_string(rh.real_hostname, "real_hostname");
+ push_nullable_string(rh.real_ip, "from_ip");
+ push_nullable_string(rh.by_hostname, "by_hostname");
+ push_nullable_string(rh.for_mbox, "for");
+
+ if (rh.addr) {
+ rspamd_lua_ip_push(L, rh.addr);
+ }
+ else {
+ lua_pushnil(L);
+ }
+ lua_setfield(L, -2, "real_ip");
+
+ lua_pushstring(L, received_protocol_to_string(rh.flags));
+ lua_setfield(L, -2, "proto");
+
+ lua_pushinteger(L, rh.timestamp);
+ lua_setfield(L, -2, "timestamp");
+
+ lua_rawseti(L, -2, i++);
+ }
+
+ return true;
+}
+
+}// namespace rspamd::mime
+
+bool rspamd_received_header_parse(struct rspamd_task *task,
+ const char *data, size_t sz,
+ struct rspamd_mime_header *hdr)
+{
+ auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers));
+
+ if (recv_chain_ptr == nullptr) {
+ /* This constructor automatically registers dtor in mempool */
+ recv_chain_ptr = new rspamd::mime::received_header_chain(task);
+ MESSAGE_FIELD(task, received_headers) = (void *) recv_chain_ptr;
+ }
+ return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
+ std::string_view{data, sz}, hdr);
+}
+
+bool rspamd_received_maybe_fix_task(struct rspamd_task *task)
+{
+ return rspamd::mime::received_maybe_fix_task(task);
+}
+
+bool rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
+{
+ return rspamd::mime::received_export_to_lua(
+ static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
+ L);
+}
+
+/* Tests part */
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+TEST_SUITE("received")
+{
+ TEST_CASE("parse received")
+ {
+ using namespace std::string_view_literals;
+ using map_type = ankerl::unordered_dense::map<std::string_view, std::string_view>;
+ std::vector<std::pair<std::string_view, map_type>> cases{
+ // Simple received
+ {"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
+ {{"real_ip", "185.243.30.90"},
+ {"real_hostname", "smtp11.mailtrack.pl"},
+ {"from_hostname", "smtp11.mailtrack.pl"}}},
+ // Real Postfix IPv6 received
+ {"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
+ "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
+ "\t(Client did not present a certificate)\n"
+ "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
+ "\tfor <test@example.com>; Mon, 6 Jul 2015 09:01:20 +0000 (UTC)\n"
+ "\t(envelope-from upwest201diana@outlook.com)"sv,
+ {{"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
+ {"from_hostname", "server.chat-met-vreemden.nl"},
+ {"by_hostname", "mx1.freebsd.org"},
+ {"for_mbox", "<test@example.com>"}}},
+ // Exim IPv4 received
+ {"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
+ " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
+ " (envelope-from <exim-dev-bounces@exim.org>)\n"
+ " id 1fZ55o-0006DP-3H\n"
+ " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
+ {
+ {"from_hostname", "localhost"},
+ {"real_ip", "127.0.0.1"},
+ {"for_mbox", "<xxx@xxx.xxx>"},
+ {"by_hostname", "hummus.csx.cam.ac.uk"},
+ }},
+ // Exim IPv6 received
+ {"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
+ " helo=mx.spodhuis.org)\n"
+ " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
+ " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
+ " id 1fZ55k-0006CO-9M\n"
+ " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
+ {
+ {"from_hostname", "smtp.spodhuis.org"},
+ {"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
+ {"for_mbox", "exim-dev@exim.org"},
+ {"by_hostname", "hummus.csx.cam.ac.uk"},
+ }},
+ // Haraka received
+ {"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
+ "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
+ "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
+ {
+ {"from_hostname", "aaa.cn"},
+ {"real_ip", "1.1.1.1"},
+ {"by_hostname", "localhost.localdomain"},
+ }},
+ // Invalid by
+ {"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
+ "by guovswzqkvry051@sohu.com with gg login "
+ "by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
+ {
+ {"from_hostname", "192.83.172.101"},
+ {"real_ip", "192.83.172.101"},
+ }},
+ // Invalid hostinfo
+ {"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
+ " Thu, 26 Sep 2019 11:19:07 +0200"sv,
+ {
+ {"by_hostname", "example.com"},
+ {"from_hostname", "example.com"},
+ {"real_hostname", "example.com"},
+ }},
+ // Different real and announced hostnames + broken crap
+ {"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
+ "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
+ ": <g @yi.br>"sv,
+ {
+ {"real_ip", "1.1.1.1"},
+ {"from_hostname", "171-29.br"},
+ {"real_hostname", "1-1-1-1.z.com.br"},
+ {"by_hostname", "x.com.br"},
+ }},
+ // Different real and announced ips + no hostname
+ {"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
+ {
+ {"real_ip", "127.0.0.2"},
+ {"from_hostname", "127.0.0.1"},
+ {"by_hostname", "smtp.gmail.com"},
+ }},
+ // Different real and hostanes
+ {"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
+ "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
+ {
+ {"real_ip", "185.118.166.127"},
+ {"from_hostname", "185.118.166.127"},
+ {"real_hostname", "steven2.zhou01.pserver.ru"},
+ {"by_hostname", "mail.832zsu.cn"},
+ }},
+ // \0 in received must be filtered
+ {"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
+ {{"real_ip", "185.243.30.90"},
+ {"real_hostname", "smtp11.mailtrack.pl"},
+ {"from_hostname", "smtp11.mailtrack.pl"}}},
+ // No from part
+ {"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
+ {
+ {"by_hostname", "mail.832zsu.cn"},
+ }},
+ // From part is in the comment
+ {"(from asterisk@localhost)\n"
+ " by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
+ " Thu, 6 Aug 2020 11:50:04 -0500"sv,
+ {
+ {"by_hostname", "pbx.xxx.com"},
+ }},
+ };
+ rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
+
+ for (auto &&c: cases) {
+ SUBCASE(c.first.data())
+ {
+ rspamd::mime::received_header_chain chain;
+ auto ret = rspamd::mime::received_header_parse(chain, pool,
+ c.first, nullptr);
+ CHECK(ret == true);
+ auto &&rh = chain.get_received(0);
+ CHECK(rh.has_value());
+ auto res = rh.value().get().as_map();
+
+ for (const auto &expected: c.second) {
+ CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
+ CHECK(res[expected.first] == expected.second);
+ }
+ for (const auto &existing: res) {
+ CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
+ CHECK(c.second[existing.first] == existing.second);
+ }
+ }
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+} \ No newline at end of file