diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /src/ragel | |
parent | Initial commit. (diff) | |
download | rspamd-upstream.tar.xz rspamd-upstream.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/ragel/content_disposition.rl | 36 | ||||
-rw-r--r-- | src/ragel/content_disposition_parser.rl | 126 | ||||
-rw-r--r-- | src/ragel/content_type.rl | 36 | ||||
-rw-r--r-- | src/ragel/rfc2047_parser.rl | 85 | ||||
-rw-r--r-- | src/ragel/smtp_addr_parser.rl | 103 | ||||
-rw-r--r-- | src/ragel/smtp_address.rl | 36 | ||||
-rw-r--r-- | src/ragel/smtp_base.rl | 44 | ||||
-rw-r--r-- | src/ragel/smtp_date.rl | 230 | ||||
-rw-r--r-- | src/ragel/smtp_date_parser.rl | 47 | ||||
-rw-r--r-- | src/ragel/smtp_ip.rl | 36 | ||||
-rw-r--r-- | src/ragel/smtp_ip_parser.rl | 56 |
11 files changed, 835 insertions, 0 deletions
diff --git a/src/ragel/content_disposition.rl b/src/ragel/content_disposition.rl new file mode 100644 index 0000000..862015e --- /dev/null +++ b/src/ragel/content_disposition.rl @@ -0,0 +1,36 @@ +%%{ + machine content_disposition; + + # https://tools.ietf.org/html/rfc2045#section-5.1 + + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + qcontent = qtextSMTP | quoted_pairSMTP | textUTF8; + quoted_string = CFWS? + (DQUOTE + (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) + DQUOTE) CFWS?; + token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; + value = (quoted_string | (token -- ('"' | 0x3d | utf8_2c | utf8_3c | utf8_4c))+) >Param_Value_Start %Param_Value_End; + attribute = (quoted_string | (token -- ('"' | '='))+) >Param_Name_Start %Param_Name_End; + parameter = CFWS? attribute FWS? "=" FWS? value CFWS?; + + ietf_token = token+; + custom_x_token = /x/i "-" token+; + extension_token = ietf_token | custom_x_token; + disposition_type = /inline/i %Disposition_Inline | /attachment/i %Disposition_Attachment + | extension_token >Disposition_Start %Disposition_End; + disposition_parm = parameter; + content_disposition = disposition_type (";" disposition_parm)*; + + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } +}%% diff --git a/src/ragel/content_disposition_parser.rl b/src/ragel/content_disposition_parser.rl new file mode 100644 index 0000000..f1b0172 --- /dev/null +++ b/src/ragel/content_disposition_parser.rl @@ -0,0 +1,126 @@ +%%{ + machine content_type_parser; + alphtype unsigned char; + + action Disposition_Start { + } + + action Disposition_End { + } + + action Disposition_Inline { + cd->type = RSPAMD_CT_INLINE; + } + + action Disposition_Attachment { + cd->type = RSPAMD_CT_ATTACHMENT; + } + + action Param_Name_Start { + qstart = NULL; + qend = NULL; + pname_start = p; + pname_end = NULL; + } + + action Param_Name_End { + if (qstart) { + pname_start = qstart; + } + if (qend && qend >= qstart) { + pname_end = qend; + } + else if (p >= pname_start) { + pname_end = p; + } + qstart = NULL; + qend = NULL; + } + + + action Param_Value_Start { + qstart = NULL; + qend = NULL; + + if (pname_end) { + pvalue_start = p; + pvalue_end = NULL; + } + } + + + action Param_Value_End { + if (pname_end) { + if (qstart) { + pvalue_start = qstart; + } + if (qend && qend >= qstart) { + pvalue_end = qend; + } + else if (p >= pvalue_start) { + pvalue_end = p; + } + qstart = NULL; + qend = NULL; + + if (pvalue_end && pvalue_end > pvalue_start && pname_end > pname_start) { + rspamd_content_disposition_add_param (pool, cd, pname_start, pname_end, pvalue_start, pvalue_end); + } + } + + pname_start = NULL; + pname_end = NULL; + pvalue_start = NULL; + pvalue_end = NULL; + qend = NULL; + qstart = NULL; + } + + action Quoted_Str_Start { + qstart = p; + qend = NULL; + } + + action Quoted_Str_End { + if (qstart) { + qend = p; + } + } + + include smtp_base "smtp_base.rl"; + include content_disposition "content_disposition.rl"; + + main := content_disposition; + +}%% + +#include "smtp_parsers.h" +#include "content_type.h" + +%% write data; + +gboolean +rspamd_content_disposition_parser (const char *data, size_t len, struct rspamd_content_disposition *cd, rspamd_mempool_t *pool) +{ + const unsigned char *p = data, *pe = data + len, *eof, *qstart = NULL, *qend = NULL, + *pname_start = NULL, *pname_end = NULL, *pvalue_start = NULL, *pvalue_end = NULL; + int cs, *stack = NULL; + gsize top = 0; + struct _ragel_st_storage { + int *data; + gsize size; + } st_storage; + + memset (&st_storage, 0, sizeof (st_storage)); + memset (cd, 0, sizeof (*cd)); + eof = pe; + + %% write init; + %% write exec; + + if (st_storage.data) { + free (st_storage.data); + } + + return cd->attrs != NULL || cd->type != RSPAMD_CT_UNKNOWN; +} diff --git a/src/ragel/content_type.rl b/src/ragel/content_type.rl new file mode 100644 index 0000000..2988920 --- /dev/null +++ b/src/ragel/content_type.rl @@ -0,0 +1,36 @@ +%%{ + machine content_type; + include smtp_whitespace "smtp_whitespace.rl"; + + # https://tools.ietf.org/html/rfc2045#section-5.1 + + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + qcontent = qtextSMTP | quoted_pairSMTP; + quoted_string = (DQUOTE + (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End) + DQUOTE); + token = 0x21..0x27 | 0x2a..0x2b | 0x2c..0x2e | 0x30..0x39 | 0x41..0x5a | 0x5e..0x7e; + value = (quoted_string | (token)+) >Param_Value_Start %Param_Value_End; + attribute = (token+) >Param_Name_Start %Param_Name_End; + parameter = CFWS? attribute FWS? "=" FWS? value CFWS?; + + ietf_token = token+; + custom_x_token = 'x'i "-" token+; + extension_token = ietf_token | custom_x_token; + iana_token = token+; + main_type = (extension_token) >Type_Start %Type_End; + sub_type = (extension_token | iana_token) >Subtype_Start %Subtype_End; + content_type = main_type ("/" sub_type)? (((CFWS? ";"+) | CFWS) parameter CFWS?)*; + + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } +}%%
\ No newline at end of file diff --git a/src/ragel/rfc2047_parser.rl b/src/ragel/rfc2047_parser.rl new file mode 100644 index 0000000..88e107b --- /dev/null +++ b/src/ragel/rfc2047_parser.rl @@ -0,0 +1,85 @@ +%%{ + # It actually implements rfc2047 + rfc2231 extension + machine rfc2047_parser; + + action Start_Charset { + charset_start = p; + } + + action End_Charset { + if (charset_start && p > charset_start) { + charset_end = p; + } + } + + action End_Encoding { + if (p > in) { + switch (*(p - 1)) { + case 'B': + case 'b': + encoding = RSPAMD_RFC2047_BASE64; + break; + default: + encoding = RSPAMD_RFC2047_QP; + break; + } + } + } + + action Start_Encoded { + encoded_start = p; + } + + action End_Encoded { + if (encoded_start && p > encoded_start) { + encoded_end = p; + } + } + + primary_tag = alpha{1,8}; + subtag = alpha{1,8}; + language = primary_tag ( "-" subtag )*; + especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*"; + token = (graph - especials)+; + charset = token; + encoding = "Q" | "q" | "B" | "b"; + encoded_text = (print+ -- ("?=")); + encoded_word = "=?" charset >Start_Charset %End_Charset + ("*" language)? "?" + encoding %End_Encoding "?" + encoded_text >Start_Encoded %End_Encoded + "?="?; + main := encoded_word; +}%% + +#include "smtp_parsers.h" +#include "mime_headers.h" + +%% write data; + +gboolean +rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding, + const gchar **charset, gsize *charset_len, + const gchar **encoded, gsize *encoded_len) +{ + const char *p = in, *pe = in + len, + *encoded_start = NULL, *encoded_end = NULL, + *charset_start = NULL, *charset_end = NULL, + *eof = in + len; + gint encoding = RSPAMD_RFC2047_QP, cs = 0; + + %% write init; + %% write exec; + + if (encoded_end) { + *pencoding = encoding; + *charset = charset_start; + *charset_len = charset_end - charset_start; + *encoded = encoded_start; + *encoded_len = encoded_end - encoded_start; + + return TRUE; + } + + return FALSE; +} diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl new file mode 100644 index 0000000..b5b4863 --- /dev/null +++ b/src/ragel/smtp_addr_parser.rl @@ -0,0 +1,103 @@ +%%{ + + machine smtp_addr_parser; + + action IP6_start {} + action IP6_end {} + action IP4_start {} + action IP4_end {} + + action User_start { + addr->user = p; + } + + action User_end { + if (addr->user) { + addr->user_len = p - addr->user; + } + } + + action Domain_start { + addr->domain = p; + } + + action Domain_end { + if (addr->domain) { + addr->domain_len = p - addr->domain; + } + } + + action Domain_addr_start { + addr->domain = p; + addr->flags |= RSPAMD_EMAIL_ADDR_IP; + } + + action Domain_addr_end { + if (addr->domain) { + addr->domain_len = p - addr->domain; + } + } + + action User_has_backslash { + addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH; + } + + action Quoted_addr { + addr->flags |= RSPAMD_EMAIL_ADDR_QUOTED; + } + + action Empty_addr { + addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY|RSPAMD_EMAIL_ADDR_VALID; + addr->addr = ""; + addr->user = addr->addr; + addr->domain = addr->addr; + } + + action Valid_addr { + if (addr->addr_len > 0) { + addr->flags |= RSPAMD_EMAIL_ADDR_VALID; + } + } + + action Addr_has_angle { + addr->flags |= RSPAMD_EMAIL_ADDR_BRACED; + } + + action Addr_start { + addr->addr = p; + } + + action Addr_end { + if (addr->addr) { + addr->addr_len = p - addr->addr; + } + } + + include smtp_base "smtp_base.rl"; + include smtp_ip "smtp_ip.rl"; + include smtp_address "smtp_address.rl"; + + main := SMTPAddr; +}%% + +#include "smtp_parsers.h" + +%% write data; + +int +rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr) +{ + const char *p = data, *pe = data + len, *eof; + int cs; + + g_assert (addr != NULL); + memset (addr, 0, sizeof (*addr)); + addr->raw = data; + addr->raw_len = len; + eof = pe; + + %% write init; + %% write exec; + + return cs; +} diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl new file mode 100644 index 0000000..0caf1a6 --- /dev/null +++ b/src/ragel/smtp_address.rl @@ -0,0 +1,36 @@ +%%{ + machine smtp_address; + + # SMTP address spec + # Source: https://tools.ietf.org/html/rfc5321#section-4.1.2 + # Dependencies: smtp_base + smtp_ip + # Required actions: + # - User_has_backslash + # - User_end + # - Quoted_addr + # - Domain_start + # - Domain_end + # - Addr_end + # - Addr_has_angle + # - Valid_addr + # - Empty_addr + # + from deps: + # - IP4_start + # - IP4_end + # - IP6_start + # - IP6_end + # - Domain_addr_start + # - Domain_addr_end + + # SMTP address spec + # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2 + + QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash; + Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr; + Local_part = Dot_string >User_start %User_end | Quoted_string; + Mailbox = Local_part "@" (address_literal | Domain >Domain_start %Domain_end); + UnangledPath = ( Adl ":" )? Mailbox >Addr_start %Addr_end "."?; + AngledPath = "<" space* UnangledPath space* ">" %Addr_has_angle; + Path = AngledPath | UnangledPath; + SMTPAddr = space* (Path | "<>" %Empty_addr ) %Valid_addr space*; +}%% diff --git a/src/ragel/smtp_base.rl b/src/ragel/smtp_base.rl new file mode 100644 index 0000000..cb4f066 --- /dev/null +++ b/src/ragel/smtp_base.rl @@ -0,0 +1,44 @@ +%%{ + machine smtp_base; + + # Base SMTP definitions + # Dependencies: none + # Required actions: none + + WSP = " "; + CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); + DQUOTE = '"'; + + # Printable US-ASCII characters not including specials + atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | + "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | + "-" | "`" | "{" | "|" | "}" | "~"; + # Printable US-ASCII characters not including "[", "]", or "\" + dtext = 33..90 | 94..126; + # Printable US-ASCII characters not including "(", ")", or "\" + ctext = 33..39 | 42..91 | 93..126; + + dcontent = 33..90 | 94..126; + Let_dig = alpha | digit; + Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; + + quoted_pairSMTP = "\\" 32..126; + qtextSMTP = 32..33 | 35..91 | 93..126; + utf8_cont = 0x80..0xbf; + utf8_2c = 0xc0..0xdf utf8_cont; + utf8_3c = 0xe0..0xef utf8_cont utf8_cont; + utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; + textUTF8 = qtextSMTP | utf8_2c | utf8_3c | utf8_4c; + Atom = atext+; + Dot_string = Atom ("." Atom)*; + dot_atom_text = atext+ ("." atext+)*; + #FWS = ((WSP* CRLF)? WSP+); + FWS = WSP+; # We work with unfolded headers, so we can simplify machine + + sub_domain = Let_dig Ldh_str?; + Domain = sub_domain ("." sub_domain)*; + Atdomain = "@" Domain; + Adl = Atdomain ( "," Atdomain )*; + + Standardized_tag = Ldh_str; +}%%
\ No newline at end of file diff --git a/src/ragel/smtp_date.rl b/src/ragel/smtp_date.rl new file mode 100644 index 0000000..125ae8a --- /dev/null +++ b/src/ragel/smtp_date.rl @@ -0,0 +1,230 @@ +%%{ + machine smtp_date; + + # SMTP date spec + # Obtained from: http://tools.ietf.org/html/rfc5322#section_3.3 + + + action Day_Start { + tmp = p; + } + action Day_End { + if (p > tmp) { + gulong n; + if (rspamd_strtoul (tmp, p - tmp, &n)) { + if (n > 0 && n <= 31) { + tm.tm_mday = n; + } + else { + fbreak; + } + } + } + } + action Month_End { + + } + action Year_Start { + tmp = p; + } + action Year_End { + if (p > tmp) { + gulong n; + if (rspamd_strtoul (tmp, p - tmp, &n)) { + if (n < 1000) { + if (n < 50) { + tm.tm_year = n - 1900 + 2000; + } + else { + tm.tm_year = n; + } + } + else { + tm.tm_year = n - 1900; + } + } + } + } + action Hour_Start { + tmp = p; + } + action Hour_End { + if (p > tmp) { + gulong n; + if (rspamd_strtoul (tmp, p - tmp, &n)) { + if (n < 24) { + tm.tm_hour = n; + } + else { + fbreak; + } + } + } + else { + fbreak; + } + } + action Minute_Start { + tmp = p; + } + action Minute_End { + if (p > tmp) { + gulong n; + if (rspamd_strtoul (tmp, p - tmp, &n)) { + if (n < 60) { + tm.tm_min = n; + } + else { + fbreak; + } + } + } + else { + fbreak; + } + } + action Second_Start { + tmp = p; + } + action Second_End { + if (p > tmp) { + gulong n; + if (rspamd_strtoul (tmp, p - tmp, &n)) { + if (n <= 60) { /* Leap second */ + tm.tm_sec = n; + } + else { + fbreak; + } + } + } + else { + fbreak; + } + } + action TZ_Sign { + tmp = p; + } + action TZ_Offset_Start { + + } + action TZ_Offset_End { + if (p > tmp) { + rspamd_strtoul (tmp, p - tmp, (gulong *)&tz); + + if (*(tmp - 1) == '-') { + tz = -(tz); + } + } + } + action Obs_Zone_End { + } + action DT_End { + } + + # Specific actions + # Months + action Month_Jan { + tm.tm_mon = 0; + } + action Month_Feb { + tm.tm_mon = 1; + } + action Month_Mar { + tm.tm_mon = 2; + } + action Month_Apr { + tm.tm_mon = 3; + } + action Month_May { + tm.tm_mon = 4; + } + action Month_Jun { + tm.tm_mon = 5; + } + action Month_Jul { + tm.tm_mon = 6; + } + action Month_Aug { + tm.tm_mon = 7; + } + action Month_Sep { + tm.tm_mon = 8; + } + action Month_Oct { + tm.tm_mon = 9; + } + action Month_Nov { + tm.tm_mon = 10; + } + action Month_Dec { + tm.tm_mon = 11; + } + # Obsoleted timezones + action TZ_UT { + tz = 0; + } + action TZ_GMT { + tz = 0; + } + action TZ_EST { + tz = -500; + } + action TZ_EDT { + tz = -400; + } + action TZ_CST { + tz = -600; + } + action TZ_CDT { + tz = -500; + } + action TZ_MST { + tz = -700; + } + action TZ_MDT { + tz = -600; + } + action TZ_PST { + tz = -800; + } + action TZ_PDT { + tz = -700; + } + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + digit_2 = digit{2}; + digit_4 = digit{4}; + day_name = "Mon" | "Tue" | "Wed" | "Thu" | + "Fri" | "Sat" | "Sun"; + day_of_week = FWS? day_name; + day = FWS? digit{1,2} >Day_Start %Day_End FWS; + month = "Jan" %Month_Jan | "Feb" %Month_Feb | "Mar" %Month_Mar | "Apr" %Month_Apr | + "May" %Month_May | "Jun" %Month_Jun | "Jul" %Month_Jul | "Aug" %Month_Aug | + "Sep" %Month_Sep | "Oct" %Month_Oct | "Nov" %Month_Nov | "Dec" %Month_Dec; + year = FWS digit{2,4} >Year_Start %Year_End FWS; + date = day month %Month_End year; + hour = digit_2; + minute = digit_2; + second = digit_2; + time_of_day = hour >Hour_Start %Hour_End ":" minute >Minute_Start %Minute_End (":" second >Second_Start %Second_End )?; + zone = ("+" | "-") %TZ_Sign digit_4 >TZ_Offset_Start %TZ_Offset_End; + obs_zone = "UT" %TZ_UT | "GMT" %TZ_GMT | + "EST" %TZ_EST | "EDT" %TZ_EDT | + "CST" %TZ_CST | "CDT" %TZ_CDT | + "MST" %TZ_MST | "MDT" %TZ_MDT | + "PST" %TZ_PST | "PDT" %TZ_PDT | + [a-iA-I] | [k-zK-Z]; + time = time_of_day %DT_End FWS (zone | obs_zone %Obs_Zone_End) FWS*; + date_time = (day_of_week ",")? date time CFWS?; +}%% diff --git a/src/ragel/smtp_date_parser.rl b/src/ragel/smtp_date_parser.rl new file mode 100644 index 0000000..8d99ea9 --- /dev/null +++ b/src/ragel/smtp_date_parser.rl @@ -0,0 +1,47 @@ +%%{ + + machine smtp_date_parser; + alphtype unsigned char; + include smtp_base "smtp_base.rl"; + include smtp_date "smtp_date.rl"; + + main := date_time; +}%% + +#include "smtp_parsers.h" +#include "util.h" + +%% write data; + +guint64 +rspamd_parse_smtp_date (const unsigned char *data, size_t len, GError **err) +{ + const unsigned char *p = data, *pe = data + len, *eof = data + len, *tmp = data; + struct tm tm; + glong tz = 0; + gint cs = 0, *stack = NULL;; + gsize top = 0; + + memset (&tm, 0, sizeof (tm)); + + struct _ragel_st_storage { + int *data; + gsize size; + } st_storage; + memset (&st_storage, 0, sizeof (st_storage)); + + %% write init; + %% write exec; + + if (st_storage.data) { + free (st_storage.data); + } + + if ( cs < %%{ write first_final; }%% ) { + g_set_error (err, g_quark_from_static_string ("smtp_date"), cs, "invalid date at offset %d (%c), state %d", + (int)(p - data), (*p > 0 && *p < 128) ? *p : '?', cs); + return (guint64)(-1); + } + + return rspamd_tm_to_time (&tm, tz); +}
\ No newline at end of file diff --git a/src/ragel/smtp_ip.rl b/src/ragel/smtp_ip.rl new file mode 100644 index 0000000..ed10c95 --- /dev/null +++ b/src/ragel/smtp_ip.rl @@ -0,0 +1,36 @@ +%%{ + machine smtp_ip; + + # Parses IPv4/IPv6 address + # Source: https://tools.ietf.org/html/rfc5321#section-4.1.3 + # Dependencies: none + # Required actions: + # - IP4_start + # - IP4_end + # - IP6_start + # - IP6_end + # - Domain_addr_start + # - Domain_addr_end + + Snum = digit{1,3}; + IPv4_addr = (Snum ("." Snum){3}); + IPv4_address_literal = IPv4_addr >IP4_start %IP4_end; + IPv6_hex = xdigit{1,4}; + IPv6_full = IPv6_hex (":" IPv6_hex){7}; + IPv6_comp = (IPv6_hex (":" IPv6_hex){0,5})? "::" + (IPv6_hex (":" IPv6_hex){0,5})?; + IPv6v4_full = IPv6_hex (":" IPv6_hex){5} ":" IPv4_address_literal; + IPv6v4_comp = (IPv6_hex (":" IPv6_hex){0,3})? "::" + (IPv6_hex (":" IPv6_hex){0,3} ":")? + IPv4_address_literal; + IPv6_simple = IPv6_full | IPv6_comp; + IPv6_addr = IPv6_simple | IPv6v4_full | IPv6v4_comp; + IPv6_address_literal = "IPv6:" %IP6_start IPv6_addr %IP6_end; + + General_address_literal = Standardized_tag ":" dcontent+; + address_literal = "[" ( IPv4_address_literal | + IPv6_address_literal | + General_address_literal ) >Domain_addr_start %Domain_addr_end "]"; + non_conformant_address_literal = IPv4_address_literal >Domain_addr_start %Domain_addr_end; + +}%%
\ No newline at end of file diff --git a/src/ragel/smtp_ip_parser.rl b/src/ragel/smtp_ip_parser.rl new file mode 100644 index 0000000..617f731 --- /dev/null +++ b/src/ragel/smtp_ip_parser.rl @@ -0,0 +1,56 @@ +%%{ + + machine smtp_ip_parser; + + action IP6_start { + in_v6 = 1; + ip_start = p; + } + action IP6_end { + in_v6 = 0; + ip_end = p; + } + action IP4_start { + if (!in_v6) { + ip_start = p; + } + } + action IP4_end { + if (!in_v6) { + ip_end = p; + } + } + + action Domain_addr_start {} + action Domain_addr_end {} + + include smtp_base "smtp_base.rl"; + include smtp_ip "smtp_ip.rl"; + + main := address_literal | non_conformant_address_literal; +}%% + +#include "smtp_parsers.h" +#include "util.h" +#include "addr.h" + +%% write data; + +rspamd_inet_addr_t * +rspamd_parse_smtp_ip (const char *data, size_t len, rspamd_mempool_t *pool) +{ + const char *p = data, *pe = data + len, *eof = data + len; + const char *ip_start = NULL, *ip_end = NULL; + gboolean in_v6 = FALSE; + gint cs = 0; + + %% write init; + %% write exec; + + if (ip_start && ip_end && ip_end > ip_start) { + return rspamd_parse_inet_address_pool (ip_start, ip_end - ip_start, pool, + RSPAMD_INET_ADDRESS_PARSE_NO_UNIX|RSPAMD_INET_ADDRESS_PARSE_REMOTE); + } + + return NULL; +}
\ No newline at end of file |