diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /src/libmime/archives.c | |
parent | Initial commit. (diff) | |
download | rspamd-upstream.tar.xz rspamd-upstream.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libmime/archives.c')
-rw-r--r-- | src/libmime/archives.c | 2057 |
1 files changed, 2057 insertions, 0 deletions
diff --git a/src/libmime/archives.c b/src/libmime/archives.c new file mode 100644 index 0000000..ea0ea55 --- /dev/null +++ b/src/libmime/archives.c @@ -0,0 +1,2057 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "message.h" +#include "task.h" +#include "archives.h" +#include "libmime/mime_encoding.h" +#include <unicode/uchar.h> +#include <unicode/utf8.h> +#include <unicode/utf16.h> +#include <unicode/ucnv.h> + +#define msg_debug_archive(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \ + G_STRFUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(archive) + +static void +rspamd_archive_dtor(gpointer p) +{ + struct rspamd_archive *arch = p; + struct rspamd_archive_file *f; + guint i; + + for (i = 0; i < arch->files->len; i++) { + f = g_ptr_array_index(arch->files, i); + + if (f->fname) { + g_string_free(f->fname, TRUE); + } + + g_free(f); + } + + g_ptr_array_free(arch->files, TRUE); +} + +static bool +rspamd_archive_file_try_utf(struct rspamd_task *task, + struct rspamd_archive *arch, + struct rspamd_archive_file *fentry, + const gchar *in, gsize inlen) +{ + const gchar *charset = NULL, *p, *end; + GString *res; + + charset = rspamd_mime_charset_find_by_content(in, inlen, TRUE); + + if (charset) { + UChar *tmp; + UErrorCode uc_err = U_ZERO_ERROR; + gint32 r, clen, dlen; + struct rspamd_charset_converter *conv; + UConverter *utf8_converter; + + conv = rspamd_mime_get_converter_cached(charset, task->task_pool, + TRUE, &uc_err); + utf8_converter = rspamd_get_utf8_converter(); + + if (conv == NULL) { + msg_info_task("cannot open converter for %s: %s", + charset, u_errorName(uc_err)); + fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; + fentry->fname = g_string_new_len(in, inlen); + + return false; + } + + tmp = g_malloc(sizeof(*tmp) * (inlen + 1)); + r = rspamd_converter_to_uchars(conv, tmp, inlen + 1, + in, inlen, &uc_err); + if (!U_SUCCESS(uc_err)) { + msg_info_task("cannot convert data to unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp); + + fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; + fentry->fname = g_string_new_len(in, inlen); + + return NULL; + } + + int i = 0; + + while (i < r) { + UChar32 uc; + + U16_NEXT(tmp, i, r, uc); + + if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) { + msg_info_task("control character in archive file name found: 0x%02xd " + "(filename=%T)", + uc, arch->archive_name); + fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; + break; + } + } + + clen = ucnv_getMaxCharSize(utf8_converter); + dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen); + res = g_string_sized_new(dlen); + r = ucnv_fromUChars(utf8_converter, res->str, dlen, tmp, r, &uc_err); + + if (!U_SUCCESS(uc_err)) { + msg_info_task("cannot convert data from unicode from %s: %s", + charset, u_errorName(uc_err)); + g_free(tmp); + g_string_free(res, TRUE); + fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; + fentry->fname = g_string_new_len(in, inlen); + + return NULL; + } + + g_free(tmp); + res->len = r; + + msg_debug_archive("converted from %s to UTF-8 inlen: %z, outlen: %d", + charset, inlen, r); + fentry->fname = res; + } + else { + /* Convert unsafe characters to '?' */ + res = g_string_sized_new(inlen); + p = in; + end = in + inlen; + + while (p < end) { + if (g_ascii_isgraph(*p)) { + g_string_append_c(res, *p); + } + else { + g_string_append_c(res, '?'); + + if (*p < 0x7f && (g_ascii_iscntrl(*p) || *p == '\0')) { + if (!(fentry->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED)) { + msg_info_task("suspicious character in archive file name found: 0x%02xd " + "(filename=%T)", + (int) *p, arch->archive_name); + fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED; + } + } + } + + p++; + } + fentry->fname = res; + } + + return true; +} + +static void +rspamd_archive_process_zip(struct rspamd_task *task, + struct rspamd_mime_part *part) +{ + const guchar *p, *start, *end, *eocd = NULL, *cd; + const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46; + const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02}; + const guint max_processed = 1024; + guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0; + guint16 extra_len, fname_len, comment_len; + struct rspamd_archive *arch; + struct rspamd_archive_file *f = NULL; + + /* Zip files have interesting data at the end of archive */ + p = part->parsed_data.begin + part->parsed_data.len - 1; + start = part->parsed_data.begin; + end = p; + + /* Search for EOCD: + * 22 bytes is a typical size of eocd without a comment and + * end points one byte after the last character + */ + p -= 21; + + while (p > start + sizeof(guint32)) { + guint32 t; + + if (processed > max_processed) { + break; + } + + /* XXX: not an efficient approach */ + memcpy(&t, p, sizeof(t)); + + if (GUINT32_FROM_LE(t) == eocd_magic) { + eocd = p; + break; + } + + p--; + processed++; + } + + + if (eocd == NULL) { + /* Not a zip file */ + msg_info_task("zip archive is invalid (no EOCD)"); + + return; + } + + if (end - eocd < 21) { + msg_info_task("zip archive is invalid (short EOCD)"); + + return; + } + + + memcpy(&cd_size, eocd + 12, sizeof(cd_size)); + cd_size = GUINT32_FROM_LE(cd_size); + memcpy(&cd_offset, eocd + 16, sizeof(cd_offset)); + cd_offset = GUINT32_FROM_LE(cd_offset); + + /* We need to check sanity as well */ + if (cd_offset + cd_size > (guint) (eocd - start)) { + msg_info_task("zip archive is invalid (bad size/offset for CD)"); + + return; + } + + cd = start + cd_offset; + + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); + arch->type = RSPAMD_ARCHIVE_ZIP; + if (part->cd) { + arch->archive_name = &part->cd->filename; + } + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); + + while (cd < start + cd_offset + cd_size) { + guint16 flags; + + /* Read central directory record */ + if (eocd - cd < cd_basic_len || + memcmp(cd, cd_magic, sizeof(cd_magic)) != 0) { + msg_info_task("zip archive is invalid (bad cd record)"); + + return; + } + + memcpy(&flags, cd + 8, sizeof(guint16)); + flags = GUINT16_FROM_LE(flags); + memcpy(&comp_size, cd + 20, sizeof(guint32)); + comp_size = GUINT32_FROM_LE(comp_size); + memcpy(&uncomp_size, cd + 24, sizeof(guint32)); + uncomp_size = GUINT32_FROM_LE(uncomp_size); + memcpy(&fname_len, cd + 28, sizeof(fname_len)); + fname_len = GUINT16_FROM_LE(fname_len); + memcpy(&extra_len, cd + 30, sizeof(extra_len)); + extra_len = GUINT16_FROM_LE(extra_len); + memcpy(&comment_len, cd + 32, sizeof(comment_len)); + comment_len = GUINT16_FROM_LE(comment_len); + + if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) { + msg_info_task("zip archive is invalid (too large cd record)"); + + return; + } + + f = g_malloc0(sizeof(*f)); + rspamd_archive_file_try_utf(task, arch, f, cd + cd_basic_len, fname_len); + + f->compressed_size = comp_size; + f->uncompressed_size = uncomp_size; + + if (flags & 0x41u) { + f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; + } + + if (f->fname) { + if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { + arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; + } + + g_ptr_array_add(arch->files, f); + msg_debug_archive("found file in zip archive: %v", f->fname); + } + else { + g_free(f); + + return; + } + + /* Process extra fields */ + const guchar *extra = cd + fname_len + cd_basic_len; + p = extra; + + while (p + sizeof(guint16) * 2 < extra + extra_len) { + guint16 hid, hlen; + + memcpy(&hid, p, sizeof(guint16)); + hid = GUINT16_FROM_LE(hid); + memcpy(&hlen, p + sizeof(guint16), sizeof(guint16)); + hlen = GUINT16_FROM_LE(hlen); + + if (hid == 0x0017) { + f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; + } + + p += hlen + sizeof(guint16) * 2; + } + + cd += fname_len + comment_len + extra_len + cd_basic_len; + } + + part->part_type = RSPAMD_MIME_PART_ARCHIVE; + part->specific.arch = arch; + + arch->size = part->parsed_data.len; +} + +static inline gint +rspamd_archive_rar_read_vint(const guchar *start, gsize remain, guint64 *res) +{ + /* + * From http://www.rarlab.com/technote.htm: + * Variable length integer. Can include one or more bytes, where + * lower 7 bits of every byte contain integer data and highest bit + * in every byte is the continuation flag. + * If highest bit is 0, this is the last byte in sequence. + * So first byte contains 7 least significant bits of integer and + * continuation flag. Second byte, if present, contains next 7 bits and so on. + */ + guint64 t = 0; + guint shift = 0; + const guchar *p = start; + + while (remain > 0 && shift <= 57) { + if (*p & 0x80) { + t |= ((guint64) (*p & 0x7f)) << shift; + } + else { + t |= ((guint64) (*p & 0x7f)) << shift; + p++; + break; + } + + shift += 7; + p++; + remain--; + } + + if (remain == 0 || shift > 64) { + return -1; + } + + *res = GUINT64_FROM_LE(t); + + return p - start; +} + +#define RAR_SKIP_BYTES(n) \ + do { \ + if ((n) <= 0) { \ + msg_debug_archive("rar archive is invalid (bad skip value)"); \ + return; \ + } \ + if ((gsize) (end - p) < (n)) { \ + msg_debug_archive("rar archive is invalid (truncated)"); \ + return; \ + } \ + p += (n); \ + } while (0) + +#define RAR_READ_VINT() \ + do { \ + r = rspamd_archive_rar_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("rar archive is invalid (bad vint)"); \ + return; \ + } \ + else if (r == 0) { \ + msg_debug_archive("rar archive is invalid (BAD vint offset)"); \ + return; \ + } \ + } while (0) + +#define RAR_READ_VINT_SKIP() \ + do { \ + r = rspamd_archive_rar_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("rar archive is invalid (bad vint)"); \ + return; \ + } \ + p += r; \ + } while (0) + +#define RAR_READ_UINT16(n) \ + do { \ + if (end - p < (glong) sizeof(guint16)) { \ + msg_debug_archive("rar archive is invalid (bad int16)"); \ + return; \ + } \ + n = p[0] + (p[1] << 8); \ + p += sizeof(guint16); \ + } while (0) + +#define RAR_READ_UINT32(n) \ + do { \ + if (end - p < (glong) sizeof(guint32)) { \ + msg_debug_archive("rar archive is invalid (bad int32)"); \ + return; \ + } \ + n = (guint) p[0] + ((guint) p[1] << 8) + ((guint) p[2] << 16) + ((guint) p[3] << 24); \ + p += sizeof(guint32); \ + } while (0) + +static void +rspamd_archive_process_rar_v4(struct rspamd_task *task, const guchar *start, + const guchar *end, struct rspamd_mime_part *part) +{ + const guchar *p = start, *start_section; + guint8 type; + guint flags; + guint64 sz, comp_sz = 0, uncomp_sz = 0; + struct rspamd_archive *arch; + struct rspamd_archive_file *f; + + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); + arch->type = RSPAMD_ARCHIVE_RAR; + if (part->cd) { + arch->archive_name = &part->cd->filename; + } + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); + + while (p < end) { + /* Crc16 */ + start_section = p; + RAR_SKIP_BYTES(sizeof(guint16)); + type = *p; + p++; + RAR_READ_UINT16(flags); + + if (type == 0x73) { + /* Main header, check for encryption */ + if (flags & 0x80) { + arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; + goto end; + } + } + + RAR_READ_UINT16(sz); + + if (flags & 0x8000) { + /* We also need to read ADD_SIZE element */ + guint32 tmp; + + RAR_READ_UINT32(tmp); + sz += tmp; + /* This is also used as PACK_SIZE */ + comp_sz = tmp; + } + + if (sz == 0) { + /* Zero sized block - error */ + msg_debug_archive("rar archive is invalid (zero size block)"); + + return; + } + + if (type == 0x74) { + guint fname_len; + + /* File header */ + /* Uncompressed size */ + RAR_READ_UINT32(uncomp_sz); + /* Skip to NAME_SIZE element */ + RAR_SKIP_BYTES(11); + RAR_READ_UINT16(fname_len); + + if (fname_len == 0 || fname_len > (gsize) (end - p)) { + msg_debug_archive("rar archive is invalid (bad filename size: %d)", + fname_len); + + return; + } + + /* Attrs */ + RAR_SKIP_BYTES(4); + + if (flags & 0x100) { + /* We also need to read HIGH_PACK_SIZE */ + guint32 tmp; + + RAR_READ_UINT32(tmp); + sz += tmp; + comp_sz += tmp; + /* HIGH_UNP_SIZE */ + RAR_READ_UINT32(tmp); + uncomp_sz += tmp; + } + + f = g_malloc0(sizeof(*f)); + + if (flags & 0x200) { + /* We have unicode + normal version */ + guchar *tmp; + + tmp = memchr(p, '\0', fname_len); + + if (tmp != NULL) { + /* Just use ASCII version */ + rspamd_archive_file_try_utf(task, arch, f, p, tmp - p); + msg_debug_archive("found ascii filename in rarv4 archive: %v", + f->fname); + } + else { + /* We have UTF8 filename, use it as is */ + rspamd_archive_file_try_utf(task, arch, f, p, fname_len); + msg_debug_archive("found utf filename in rarv4 archive: %v", + f->fname); + } + } + else { + rspamd_archive_file_try_utf(task, arch, f, p, fname_len); + msg_debug_archive("found ascii (old) filename in rarv4 archive: %v", + f->fname); + } + + f->compressed_size = comp_sz; + f->uncompressed_size = uncomp_sz; + + if (flags & 0x4) { + f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; + } + + if (f->fname) { + if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { + arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; + } + g_ptr_array_add(arch->files, f); + } + else { + g_free(f); + } + } + + p = start_section; + RAR_SKIP_BYTES(sz); + } + +end: + part->part_type = RSPAMD_MIME_PART_ARCHIVE; + part->specific.arch = arch; + arch->size = part->parsed_data.len; +} + +static void +rspamd_archive_process_rar(struct rspamd_task *task, + struct rspamd_mime_part *part) +{ + const guchar *p, *end, *section_start; + const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00}, + rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}; + const guint rar_encrypted_header = 4, rar_main_header = 1, + rar_file_header = 2; + guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0, + extra_sz = 0; + struct rspamd_archive *arch; + struct rspamd_archive_file *f; + gint r; + + p = part->parsed_data.begin; + end = p + part->parsed_data.len; + + if ((gsize) (end - p) <= sizeof(rar_v5_magic)) { + msg_debug_archive("rar archive is invalid (too small)"); + + return; + } + + if (memcmp(p, rar_v5_magic, sizeof(rar_v5_magic)) == 0) { + p += sizeof(rar_v5_magic); + } + else if (memcmp(p, rar_v4_magic, sizeof(rar_v4_magic)) == 0) { + p += sizeof(rar_v4_magic); + + rspamd_archive_process_rar_v4(task, p, end, part); + return; + } + else { + msg_debug_archive("rar archive is invalid (no rar magic)"); + + return; + } + + /* Rar v5 format */ + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); + arch->type = RSPAMD_ARCHIVE_RAR; + if (part->cd) { + arch->archive_name = &part->cd->filename; + } + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); + + /* Now we can have either encryption header or archive header */ + /* Crc 32 */ + RAR_SKIP_BYTES(sizeof(guint32)); + /* Size */ + RAR_READ_VINT_SKIP(); + sz = vint; + /* Type */ + section_start = p; + RAR_READ_VINT_SKIP(); + type = vint; + /* Header flags */ + RAR_READ_VINT_SKIP(); + flags = vint; + + if (flags & 0x1) { + /* Have extra zone */ + RAR_READ_VINT_SKIP(); + } + if (flags & 0x2) { + /* Data zone is presented */ + RAR_READ_VINT_SKIP(); + sz += vint; + } + + if (type == rar_encrypted_header) { + /* We can't read any further information as archive is encrypted */ + arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; + goto end; + } + else if (type != rar_main_header) { + msg_debug_archive("rar archive is invalid (bad main header)"); + + return; + } + + /* Nothing useful in main header */ + p = section_start; + RAR_SKIP_BYTES(sz); + + while (p < end) { + gboolean has_extra = FALSE; + /* Read the next header */ + /* Crc 32 */ + RAR_SKIP_BYTES(sizeof(guint32)); + /* Size */ + RAR_READ_VINT_SKIP(); + + sz = vint; + if (sz == 0) { + /* Zero sized block - error */ + msg_debug_archive("rar archive is invalid (zero size block)"); + + return; + } + + section_start = p; + /* Type */ + RAR_READ_VINT_SKIP(); + type = vint; + /* Header flags */ + RAR_READ_VINT_SKIP(); + flags = vint; + + if (flags & 0x1) { + /* Have extra zone */ + RAR_READ_VINT_SKIP(); + extra_sz = vint; + has_extra = TRUE; + } + + if (flags & 0x2) { + /* Data zone is presented */ + RAR_READ_VINT_SKIP(); + sz += vint; + comp_sz = vint; + } + + if (type != rar_file_header) { + p = section_start; + RAR_SKIP_BYTES(sz); + } + else { + /* We have a file header, go forward */ + guint64 fname_len; + bool is_directory = false; + + /* File header specific flags */ + RAR_READ_VINT_SKIP(); + flags = vint; + + /* Unpacked size */ + RAR_READ_VINT_SKIP(); + uncomp_sz = vint; + /* Attributes */ + RAR_READ_VINT_SKIP(); + + if (flags & 0x2) { + /* Unix mtime */ + RAR_SKIP_BYTES(sizeof(guint32)); + } + if (flags & 0x4) { + /* Crc32 */ + RAR_SKIP_BYTES(sizeof(guint32)); + } + if (flags & 0x1) { + /* Ignore directories for sanity purposes */ + is_directory = true; + msg_debug_archive("skip directory record in a rar archive"); + } + + if (!is_directory) { + /* Compression */ + RAR_READ_VINT_SKIP(); + /* Host OS */ + RAR_READ_VINT_SKIP(); + /* Filename length (finally!) */ + RAR_READ_VINT_SKIP(); + fname_len = vint; + + if (fname_len == 0 || fname_len > (gsize) (end - p)) { + msg_debug_archive("rar archive is invalid (bad filename size)"); + + return; + } + + f = g_malloc0(sizeof(*f)); + f->uncompressed_size = uncomp_sz; + f->compressed_size = comp_sz; + rspamd_archive_file_try_utf(task, arch, f, p, fname_len); + + if (f->fname) { + msg_debug_archive("added rarv5 file: %v", f->fname); + g_ptr_array_add(arch->files, f); + if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { + arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; + } + } + else { + g_free(f); + f = NULL; + } + + if (f && has_extra && extra_sz > 0 && + p + fname_len + extra_sz < end) { + /* Try to find encryption record in extra field */ + const guchar *ex = p + fname_len; + + while (ex < p + extra_sz) { + const guchar *t; + gint64 cur_sz = 0, sec_type = 0; + + r = rspamd_archive_rar_read_vint(ex, extra_sz, &cur_sz); + if (r == -1) { + msg_debug_archive("rar archive is invalid (bad vint)"); + return; + } + + t = ex + r; + + r = rspamd_archive_rar_read_vint(t, extra_sz - r, &sec_type); + if (r == -1) { + msg_debug_archive("rar archive is invalid (bad vint)"); + return; + } + + if (sec_type == 0x01) { + f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED; + arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; + break; + } + + ex += cur_sz; + } + } + } + + /* Restore p to the beginning of the header */ + p = section_start; + RAR_SKIP_BYTES(sz); + } + } + +end: + part->part_type = RSPAMD_MIME_PART_ARCHIVE; + part->specific.arch = arch; + arch->size = part->parsed_data.len; +} + +static inline gint +rspamd_archive_7zip_read_vint(const guchar *start, gsize remain, guint64 *res) +{ + /* + * REAL_UINT64 means real UINT64. + * UINT64 means real UINT64 encoded with the following scheme: + * + * Size of encoding sequence depends from first byte: + * First_Byte Extra_Bytes Value + * (binary) + * 0xxxxxxx : ( xxxxxxx ) + * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y + * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y + * ... + * 1111110x BYTE y[6] : ( x << (8 * 6)) + y + * 11111110 BYTE y[7] : y + * 11111111 BYTE y[8] : y + */ + guchar t; + + if (remain == 0) { + return -1; + } + + t = *start; + + if (!isset(&t, 7)) { + /* Trivial case */ + *res = t; + return 1; + } + else if (t == 0xFF) { + if (remain >= sizeof(guint64) + 1) { + memcpy(res, start + 1, sizeof(guint64)); + *res = GUINT64_FROM_LE(*res); + + return sizeof(guint64) + 1; + } + } + else { + gint cur_bit = 6, intlen = 1; + const guchar bmask = 0xFF; + guint64 tgt; + + while (cur_bit > 0) { + if (!isset(&t, cur_bit)) { + if (remain >= intlen + 1) { + memcpy(&tgt, start + 1, intlen); + tgt = GUINT64_FROM_LE(tgt); + /* Shift back */ + tgt >>= sizeof(tgt) - NBBY * intlen; + /* Add masked value */ + tgt += (guint64) (t & (bmask >> (NBBY - cur_bit))) + << (NBBY * intlen); + *res = tgt; + + return intlen + 1; + } + } + cur_bit--; + intlen++; + } + } + + return -1; +} + +#define SZ_READ_VINT_SKIP() \ + do { \ + r = rspamd_archive_7zip_read_vint(p, end - p, &vint); \ + if (r == -1) { \ + msg_debug_archive("7z archive is invalid (bad vint)"); \ + return; \ + } \ + p += r; \ + } while (0) +#define SZ_READ_VINT(var) \ + do { \ + int r; \ + r = rspamd_archive_7zip_read_vint(p, end - p, &(var)); \ + if (r == -1) { \ + msg_debug_archive("7z archive is invalid (bad vint): %s", G_STRLOC); \ + return NULL; \ + } \ + p += r; \ + } while (0) + +#define SZ_READ_UINT64(n) \ + do { \ + if (end - p < (goffset) sizeof(guint64)) { \ + msg_debug_archive("7zip archive is invalid (bad uint64): %s", G_STRLOC); \ + return; \ + } \ + memcpy(&(n), p, sizeof(guint64)); \ + n = GUINT64_FROM_LE(n); \ + p += sizeof(guint64); \ + } while (0) +#define SZ_SKIP_BYTES(n) \ + do { \ + if (end - p >= (n)) { \ + p += (n); \ + } \ + else { \ + msg_debug_archive("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint) (n), (gint) (end - p), G_STRLOC); \ + return NULL; \ + } \ + } while (0) + +enum rspamd_7zip_header_mark { + kEnd = 0x00, + kHeader = 0x01, + kArchiveProperties = 0x02, + kAdditionalStreamsInfo = 0x03, + kMainStreamsInfo = 0x04, + kFilesInfo = 0x05, + kPackInfo = 0x06, + kUnPackInfo = 0x07, + kSubStreamsInfo = 0x08, + kSize = 0x09, + kCRC = 0x0A, + kFolder = 0x0B, + kCodersUnPackSize = 0x0C, + kNumUnPackStream = 0x0D, + kEmptyStream = 0x0E, + kEmptyFile = 0x0F, + kAnti = 0x10, + kName = 0x11, + kCTime = 0x12, + kATime = 0x13, + kMTime = 0x14, + kWinAttributes = 0x15, + kComment = 0x16, + kEncodedHeader = 0x17, + kStartPos = 0x18, + kDummy = 0x19, +}; + + +#define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */ +#define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */ +#define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */ + +#define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \ + ((codec_id) == _7Z_CRYPTO_RAR_29) || \ + ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256)) + +static const guchar * +rspamd_7zip_read_bits(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, guint nbits, + guint *pbits_set) +{ + unsigned mask = 0, avail = 0, i; + gboolean bit_set = 0; + + for (i = 0; i < nbits; i++) { + if (mask == 0) { + avail = *p; + SZ_SKIP_BYTES(1); + mask = 0x80; + } + + bit_set = (avail & mask) ? 1 : 0; + + if (bit_set && pbits_set) { + (*pbits_set)++; + } + + mask >>= 1; + } + + return p; +} + +static const guchar * +rspamd_7zip_read_digest(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint64 num_streams, + guint *pdigest_read) +{ + guchar all_defined = *p; + guint64 i; + guint num_defined = 0; + /* + * BYTE AllAreDefined + * if (AllAreDefined == 0) + * { + * for(NumStreams) + * BIT Defined + * } + * UINT32 CRCs[NumDefined] + */ + SZ_SKIP_BYTES(1); + + if (all_defined) { + num_defined = num_streams; + } + else { + if (num_streams > 8192) { + /* Gah */ + return NULL; + } + + p = rspamd_7zip_read_bits(task, p, end, arch, num_streams, &num_defined); + + if (p == NULL) { + return NULL; + } + } + + for (i = 0; i < num_defined; i++) { + SZ_SKIP_BYTES(sizeof(guint32)); + } + + if (pdigest_read) { + *pdigest_read = num_defined; + } + + return p; +} + +static const guchar * +rspamd_7zip_read_pack_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) +{ + guint64 pack_pos = 0, pack_streams = 0, i, cur_sz; + guint num_digests = 0; + guchar t; + /* + * UINT64 PackPos + * UINT64 NumPackStreams + * + * [] + * BYTE NID::kSize (0x09) + * UINT64 PackSizes[NumPackStreams] + * [] + * + * [] + * BYTE NID::kCRC (0x0A) + * PackStreamDigests[NumPackStreams] + * [] + * BYTE NID::kEnd + */ + + SZ_READ_VINT(pack_pos); + SZ_READ_VINT(pack_streams); + + while (p != NULL && p < end) { + t = *p; + SZ_SKIP_BYTES(1); + msg_debug_archive("7zip: read pack info %xc", t); + + switch (t) { + case kSize: + /* We need to skip pack_streams VINTS */ + for (i = 0; i < pack_streams; i++) { + SZ_READ_VINT(cur_sz); + } + break; + case kCRC: + /* CRCs are more complicated */ + p = rspamd_7zip_read_digest(task, p, end, arch, pack_streams, + &num_digests); + break; + case kEnd: + goto end; + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + goto end; + break; + } + } + +end: + + return p; +} + +static const guchar * +rspamd_7zip_read_folder(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, guint *pnstreams, guint *ndigests) +{ + guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0; + + SZ_READ_VINT(ncoders); + + for (i = 0; i < ncoders && p != NULL && p < end; i++) { + guint64 sz, tmp; + guchar t; + /* + * BYTE + * { + * 0:3 CodecIdSize + * 4: Is Complex Coder + * 5: There Are Attributes + * 6: Reserved + * 7: There are more alternative methods. (Not used anymore, must be 0). + * } + * BYTE CodecId[CodecIdSize] + * if (Is Complex Coder) + * { + * UINT64 NumInStreams; + * UINT64 NumOutStreams; + * } + * if (There Are Attributes) + * { + * UINT64 PropertiesSize + * BYTE Properties[PropertiesSize] + * } + */ + t = *p; + SZ_SKIP_BYTES(1); + sz = t & 0xF; + /* Codec ID */ + tmp = 0; + for (j = 0; j < sz; j++) { + tmp <<= 8; + tmp += p[j]; + } + + msg_debug_archive("7zip: read codec id: %L", tmp); + + if (IS_SZ_ENCRYPTED(tmp)) { + arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; + } + + SZ_SKIP_BYTES(sz); + + if (t & (1u << 4)) { + /* Complex */ + SZ_READ_VINT(tmp); /* InStreams */ + ninstreams += tmp; + SZ_READ_VINT(tmp); /* OutStreams */ + noutstreams += tmp; + } + else { + /* XXX: is it correct ? */ + noutstreams++; + ninstreams++; + } + if (t & (1u << 5)) { + /* Attributes ... */ + SZ_READ_VINT(tmp); /* Size of attrs */ + SZ_SKIP_BYTES(tmp); + } + } + + if (noutstreams > 1) { + /* BindPairs, WTF, huh */ + for (i = 0; i < noutstreams - 1; i++) { + guint64 tmp; + + SZ_READ_VINT(tmp); + SZ_READ_VINT(tmp); + } + } + + gint64 npacked = (gint64) ninstreams - (gint64) noutstreams + 1; + msg_debug_archive("7zip: instreams=%L, outstreams=%L, packed=%L", + ninstreams, noutstreams, npacked); + + if (npacked > 1) { + /* Gah... */ + for (i = 0; i < npacked; i++) { + guint64 tmp; + + SZ_READ_VINT(tmp); + } + } + + *pnstreams = noutstreams; + (*ndigests) += npacked; + + return p; +} + +static const guchar * +rspamd_7zip_read_coders_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint *pnum_folders, guint *pnum_nodigest) +{ + guint64 num_folders = 0, i, tmp; + guchar t; + guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0; + + while (p != NULL && p < end) { + /* + * BYTE NID::kFolder (0x0B) + * UINT64 NumFolders + * BYTE External + * switch(External) + * { + * case 0: + * Folders[NumFolders] + * case 1: + * UINT64 DataStreamIndex + * } + * BYTE ID::kCodersUnPackSize (0x0C) + * for(Folders) + * for(Folder.NumOutStreams) + * UINT64 UnPackSize; + * [] + * BYTE NID::kCRC (0x0A) + * UnPackDigests[NumFolders] + * [] + * BYTE NID::kEnd + */ + + t = *p; + SZ_SKIP_BYTES(1); + msg_debug_archive("7zip: read coders info %xc", t); + + switch (t) { + case kFolder: + SZ_READ_VINT(num_folders); + msg_debug_archive("7zip: nfolders=%L", num_folders); + + if (*p != 0) { + /* External folders */ + SZ_SKIP_BYTES(1); + SZ_READ_VINT(tmp); + } + else { + SZ_SKIP_BYTES(1); + + if (num_folders > 8192) { + /* Gah */ + return NULL; + } + + if (folder_nstreams) { + g_free(folder_nstreams); + } + + folder_nstreams = g_malloc(sizeof(int) * num_folders); + + for (i = 0; i < num_folders && p != NULL && p < end; i++) { + p = rspamd_7zip_read_folder(task, p, end, arch, + &folder_nstreams[i], &num_digests); + } + } + break; + case kCodersUnPackSize: + for (i = 0; i < num_folders && p != NULL && p < end; i++) { + if (folder_nstreams) { + for (guint j = 0; j < folder_nstreams[i]; j++) { + SZ_READ_VINT(tmp); /* Unpacked size */ + msg_debug_archive("7zip: unpacked size " + "(folder=%d, stream=%d) = %L", + (gint) i, j, tmp); + } + } + else { + msg_err_task("internal 7zip error"); + } + } + break; + case kCRC: + /* + * Here are dragons. Spec tells that here there could be up + * to nfolders digests. However, according to the actual source + * code, in case of multiple out streams there should be digests + * for all out streams. + * + * In the real life (tm) it is even more idiotic: all these digests + * are in another section! But that section needs number of digests + * that are absent here. It is the most stupid thing I've ever seen + * in any file format. + * + * I hope there *WAS* some reason to do such shit... + */ + p = rspamd_7zip_read_digest(task, p, end, arch, num_digests, + &digests_read); + break; + case kEnd: + goto end; + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + goto end; + break; + } + } + +end: + + if (pnum_nodigest) { + *pnum_nodigest = num_digests - digests_read; + } + if (pnum_folders) { + *pnum_folders = num_folders; + } + + if (folder_nstreams) { + g_free(folder_nstreams); + } + + return p; +} + +static const guchar * +rspamd_7zip_read_substreams_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch, + guint num_folders, guint num_nodigest) +{ + guchar t; + guint i; + guint64 *folder_nstreams; + + if (num_folders > 8192) { + /* Gah */ + return NULL; + } + + folder_nstreams = g_alloca(sizeof(guint64) * num_folders); + memset(folder_nstreams, 0, sizeof(guint64) * num_folders); + + while (p != NULL && p < end) { + /* + * [] + * BYTE NID::kNumUnPackStream; (0x0D) + * UINT64 NumUnPackStreamsInFolders[NumFolders]; + * [] + * + * [] + * BYTE NID::kSize (0x09) + * UINT64 UnPackSizes[??] + * [] + * + * + * [] + * BYTE NID::kCRC (0x0A) + * Digests[Number of streams with unknown CRC] + * [] + + */ + t = *p; + SZ_SKIP_BYTES(1); + + msg_debug_archive("7zip: read substream info %xc", t); + + switch (t) { + case kNumUnPackStream: + for (i = 0; i < num_folders; i++) { + guint64 tmp; + + SZ_READ_VINT(tmp); + folder_nstreams[i] = tmp; + } + break; + case kCRC: + /* + * Read the comment in the rspamd_7zip_read_coders_info + */ + p = rspamd_7zip_read_digest(task, p, end, arch, num_nodigest, + NULL); + break; + case kSize: + /* + * Another brain damaged logic, but we have to support it + * as there are no ways to proceed without it. + * In fact, it is just absent in the real life... + */ + for (i = 0; i < num_folders; i++) { + for (guint j = 0; j < folder_nstreams[i]; j++) { + guint64 tmp; + + SZ_READ_VINT(tmp); /* Who cares indeed */ + } + } + break; + case kEnd: + goto end; + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + goto end; + break; + } + } + +end: + return p; +} + +static const guchar * +rspamd_7zip_read_main_streams_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) +{ + guchar t; + guint num_folders = 0, unknown_digests = 0; + + while (p != NULL && p < end) { + t = *p; + SZ_SKIP_BYTES(1); + msg_debug_archive("7zip: read main streams info %xc", t); + + /* + * + * [] + * PackInfo + * [] + + * [] + * CodersInfo + * [] + * + * [] + * SubStreamsInfo + * [] + * + * BYTE NID::kEnd + */ + switch (t) { + case kPackInfo: + p = rspamd_7zip_read_pack_info(task, p, end, arch); + break; + case kUnPackInfo: + p = rspamd_7zip_read_coders_info(task, p, end, arch, &num_folders, + &unknown_digests); + break; + case kSubStreamsInfo: + p = rspamd_7zip_read_substreams_info(task, p, end, arch, num_folders, + unknown_digests); + break; + break; + case kEnd: + goto end; + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + goto end; + break; + } + } + +end: + return p; +} + +static const guchar * +rspamd_7zip_read_archive_props(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) +{ + guchar proptype; + guint64 proplen; + + /* + * for (;;) + * { + * BYTE PropertyType; + * if (aType == 0) + * break; + * UINT64 PropertySize; + * BYTE PropertyData[PropertySize]; + * } + */ + + if (p != NULL) { + proptype = *p; + SZ_SKIP_BYTES(1); + + while (proptype != 0) { + SZ_READ_VINT(proplen); + + if (p + proplen < end) { + p += proplen; + } + else { + return NULL; + } + + proptype = *p; + SZ_SKIP_BYTES(1); + } + } + + return p; +} + +static GString * +rspamd_7zip_ucs2_to_utf8(struct rspamd_task *task, const guchar *p, + const guchar *end) +{ + GString *res; + goffset dest_pos = 0, src_pos = 0; + const gsize len = (end - p) / sizeof(guint16); + guint16 *up; + UChar32 wc; + UBool is_error = 0; + + res = g_string_sized_new((end - p) * 3 / 2 + sizeof(wc) + 1); + up = (guint16 *) p; + + while (src_pos < len) { + U16_NEXT(up, src_pos, len, wc); + + if (wc > 0) { + U8_APPEND(res->str, dest_pos, + res->allocated_len - 1, + wc, is_error); + } + + if (is_error) { + g_string_free(res, TRUE); + + return NULL; + } + } + + g_assert(dest_pos < res->allocated_len); + + res->len = dest_pos; + res->str[dest_pos] = '\0'; + + return res; +} + +static const guchar * +rspamd_7zip_read_files_info(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) +{ + guint64 nfiles = 0, sz, i; + guchar t, b; + struct rspamd_archive_file *fentry; + + SZ_READ_VINT(nfiles); + + for (; p != NULL && p < end;) { + t = *p; + SZ_SKIP_BYTES(1); + + msg_debug_archive("7zip: read file data type %xc", t); + + if (t == kEnd) { + goto end; + } + + /* This is SO SPECIAL, gah */ + SZ_READ_VINT(sz); + + switch (t) { + case kEmptyStream: + case kEmptyFile: + case kAnti: /* AntiFile, OMFG */ + /* We don't care about these bits */ + case kCTime: + case kATime: + case kMTime: + /* We don't care of these guys, but we still have to parse them, gah */ + if (sz > 0) { + SZ_SKIP_BYTES(sz); + } + break; + case kName: + /* The most useful part in this whole bloody format */ + b = *p; /* External flag */ + SZ_SKIP_BYTES(1); + + if (b) { + /* TODO: for the god sake, do something about external + * filenames... + */ + guint64 tmp; + + SZ_READ_VINT(tmp); + } + else { + for (i = 0; i < nfiles; i++) { + /* Zero terminated wchar_t: happy converting... */ + /* First, find terminator */ + const guchar *fend = NULL, *tp = p; + GString *res; + + while (tp < end - 1) { + if (*tp == 0 && *(tp + 1) == 0) { + fend = tp; + break; + } + + tp += 2; + } + + if (fend == NULL || fend - p == 0) { + /* Crap instead of fname */ + msg_debug_archive("bad 7zip name; %s", G_STRLOC); + goto end; + } + + res = rspamd_7zip_ucs2_to_utf8(task, p, fend); + + if (res != NULL) { + fentry = g_malloc0(sizeof(*fentry)); + fentry->fname = res; + g_ptr_array_add(arch->files, fentry); + msg_debug_archive("7zip: found file %v", res); + } + else { + msg_debug_archive("bad 7zip name; %s", G_STRLOC); + } + /* Skip zero terminating character */ + p = fend + 2; + } + } + break; + case kDummy: + case kWinAttributes: + if (sz > 0) { + SZ_SKIP_BYTES(sz); + } + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + goto end; + break; + } + } + +end: + return p; +} + +static const guchar * +rspamd_7zip_read_next_section(struct rspamd_task *task, + const guchar *p, const guchar *end, + struct rspamd_archive *arch) +{ + guchar t = *p; + + SZ_SKIP_BYTES(1); + + msg_debug_archive("7zip: read section %xc", t); + + switch (t) { + case kHeader: + /* We just skip byte and go further */ + break; + case kEncodedHeader: + /* + * In fact, headers are just packed, but we assume it as + * encrypted to distinguish from the normal archives + */ + msg_debug_archive("7zip: encoded header, needs to be uncompressed"); + arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ; + p = NULL; /* Cannot get anything useful */ + break; + case kArchiveProperties: + p = rspamd_7zip_read_archive_props(task, p, end, arch); + break; + case kMainStreamsInfo: + p = rspamd_7zip_read_main_streams_info(task, p, end, arch); + break; + case kAdditionalStreamsInfo: + p = rspamd_7zip_read_main_streams_info(task, p, end, arch); + break; + case kFilesInfo: + p = rspamd_7zip_read_files_info(task, p, end, arch); + break; + case kEnd: + p = NULL; + msg_debug_archive("7zip: read final section"); + break; + default: + p = NULL; + msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC); + break; + } + + return p; +} + +static void +rspamd_archive_process_7zip(struct rspamd_task *task, + struct rspamd_mime_part *part) +{ + struct rspamd_archive *arch; + const guchar *start, *p, *end; + const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + guint64 section_offset = 0, section_length = 0; + + start = part->parsed_data.begin; + p = start; + end = p + part->parsed_data.len; + + if (end - p <= sizeof(guint64) + sizeof(guint32) || + memcmp(p, sz_magic, sizeof(sz_magic)) != 0) { + msg_debug_archive("7z archive is invalid (no 7z magic)"); + + return; + } + + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_new(); + arch->type = RSPAMD_ARCHIVE_7ZIP; + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); + + /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */ + p += sizeof(guint64) + sizeof(guint32); + + SZ_READ_UINT64(section_offset); + SZ_READ_UINT64(section_length); + + if (end - p > sizeof(guint32)) { + p += sizeof(guint32); + } + else { + msg_debug_archive("7z archive is invalid (truncated crc)"); + + return; + } + + if (end - p > section_offset) { + p += section_offset; + } + else { + msg_debug_archive("7z archive is invalid (incorrect section offset)"); + + return; + } + + while ((p = rspamd_7zip_read_next_section(task, p, end, arch)) != NULL) + ; + + part->part_type = RSPAMD_MIME_PART_ARCHIVE; + part->specific.arch = arch; + if (part->cd != NULL) { + arch->archive_name = &part->cd->filename; + } + arch->size = part->parsed_data.len; +} + +static void +rspamd_archive_process_gzip(struct rspamd_task *task, + struct rspamd_mime_part *part) +{ + struct rspamd_archive *arch; + const guchar *start, *p, *end; + const guchar gz_magic[] = {0x1F, 0x8B}; + guchar flags; + + start = part->parsed_data.begin; + p = start; + end = p + part->parsed_data.len; + + if (end - p <= 10 || memcmp(p, gz_magic, sizeof(gz_magic)) != 0) { + msg_debug_archive("gzip archive is invalid (no gzip magic)"); + + return; + } + + arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch)); + arch->files = g_ptr_array_sized_new(1); + arch->type = RSPAMD_ARCHIVE_GZIP; + if (part->cd) { + arch->archive_name = &part->cd->filename; + } + rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor, + arch); + + flags = p[3]; + + if (flags & (1u << 5)) { + arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED; + } + + if (flags & (1u << 3)) { + /* We have file name presented in archive, try to use it */ + if (flags & (1u << 1)) { + /* Multipart */ + p += 12; + } + else { + p += 10; + } + + if (flags & (1u << 2)) { + /* Optional section */ + guint16 optlen = 0; + + RAR_READ_UINT16(optlen); + + if (end <= p + optlen) { + msg_debug_archive("gzip archive is invalid, bad extra length: %d", + (int) optlen); + + return; + } + + p += optlen; + } + + /* Read file name */ + const guchar *fname_start = p; + + while (p < end) { + if (*p == '\0') { + if (p > fname_start) { + struct rspamd_archive_file *f; + + f = g_malloc0(sizeof(*f)); + + rspamd_archive_file_try_utf(task, arch, f, + fname_start, p - fname_start); + + if (f->fname) { + g_ptr_array_add(arch->files, f); + + if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) { + arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES; + } + } + else { + /* Invalid filename, skip */ + g_free(f); + } + + goto set; + } + } + + p++; + } + + /* Wrong filename, not zero terminated */ + msg_debug_archive("gzip archive is invalid, bad filename at pos %d", + (int) (p - start)); + + return; + } + + /* Fallback, we need to extract file name from archive name if possible */ + if (part->cd && part->cd->filename.len > 0) { + const gchar *dot_pos, *slash_pos; + + dot_pos = rspamd_memrchr(part->cd->filename.begin, '.', + part->cd->filename.len); + + if (dot_pos) { + struct rspamd_archive_file *f; + + slash_pos = rspamd_memrchr(part->cd->filename.begin, '/', + part->cd->filename.len); + + if (slash_pos && slash_pos < dot_pos) { + f = g_malloc0(sizeof(*f)); + f->fname = g_string_sized_new(dot_pos - slash_pos); + g_string_append_len(f->fname, slash_pos + 1, + dot_pos - slash_pos - 1); + + msg_debug_archive("fallback to gzip filename based on cd: %v", + f->fname); + + g_ptr_array_add(arch->files, f); + + goto set; + } + else { + const gchar *fname_start = part->cd->filename.begin; + + f = g_malloc0(sizeof(*f)); + + if (memchr(fname_start, '.', part->cd->filename.len) != dot_pos) { + /* Double dots, something like foo.exe.gz */ + f->fname = g_string_sized_new(dot_pos - fname_start); + g_string_append_len(f->fname, fname_start, + dot_pos - fname_start); + } + else { + /* Single dot, something like foo.gzz */ + f->fname = g_string_sized_new(part->cd->filename.len); + g_string_append_len(f->fname, fname_start, + part->cd->filename.len); + } + + msg_debug_archive("fallback to gzip filename based on cd: %v", + f->fname); + + g_ptr_array_add(arch->files, f); + + goto set; + } + } + } + + return; + +set: + /* Set archive data */ + part->part_type = RSPAMD_MIME_PART_ARCHIVE; + part->specific.arch = arch; + arch->size = part->parsed_data.len; +} + +static gboolean +rspamd_archive_cheat_detect(struct rspamd_mime_part *part, const gchar *str, + const guchar *magic_start, gsize magic_len) +{ + struct rspamd_content_type *ct; + const gchar *p; + rspamd_ftok_t srch, *fname; + + ct = part->ct; + RSPAMD_FTOK_ASSIGN(&srch, "application"); + + if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp(&ct->type, &srch) == 0) { + if (rspamd_substring_search_caseless(ct->subtype.begin, ct->subtype.len, + str, strlen(str)) != -1) { + /* We still need to check magic, see #1848 */ + if (magic_start != NULL) { + if (part->parsed_data.len > magic_len && + memcmp(part->parsed_data.begin, + magic_start, magic_len) == 0) { + return TRUE; + } + /* No magic, refuse this type of archive */ + return FALSE; + } + else { + return TRUE; + } + } + } + + if (part->cd) { + fname = &part->cd->filename; + + if (fname && fname->len > strlen(str)) { + p = fname->begin + fname->len - strlen(str); + + if (rspamd_lc_cmp(p, str, strlen(str)) == 0) { + if (*(p - 1) == '.') { + if (magic_start != NULL) { + if (part->parsed_data.len > magic_len && + memcmp(part->parsed_data.begin, + magic_start, magic_len) == 0) { + return TRUE; + } + /* No magic, refuse this type of archive */ + return FALSE; + } + + return TRUE; + } + } + } + + if (magic_start != NULL) { + if (part->parsed_data.len > magic_len && + memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) { + return TRUE; + } + } + } + else { + if (magic_start != NULL) { + if (part->parsed_data.len > magic_len && + memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) { + return TRUE; + } + } + } + + return FALSE; +} + +void rspamd_archives_process(struct rspamd_task *task) +{ + guint i; + struct rspamd_mime_part *part; + const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07}; + const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04}; + const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + const guchar gz_magic[] = {0x1F, 0x8B, 0x08}; + + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) + { + if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) { + if (part->parsed_data.len > 0) { + if (rspamd_archive_cheat_detect(part, "zip", + zip_magic, sizeof(zip_magic))) { + rspamd_archive_process_zip(task, part); + } + else if (rspamd_archive_cheat_detect(part, "rar", + rar_magic, sizeof(rar_magic))) { + rspamd_archive_process_rar(task, part); + } + else if (rspamd_archive_cheat_detect(part, "7z", + sz_magic, sizeof(sz_magic))) { + rspamd_archive_process_7zip(task, part); + } + else if (rspamd_archive_cheat_detect(part, "gz", + gz_magic, sizeof(gz_magic))) { + rspamd_archive_process_gzip(task, part); + } + + if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) && + part->part_type == RSPAMD_MIME_PART_ARCHIVE && + part->specific.arch) { + struct rspamd_archive *arch = part->specific.arch; + + msg_info_task("found %s archive with incorrect content-type: %T/%T", + rspamd_archive_type_str(arch->type), + &part->ct->type, &part->ct->subtype); + + if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) { + part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; + } + } + } + } + } +} + + +const gchar * +rspamd_archive_type_str(enum rspamd_archive_type type) +{ + const gchar *ret = "unknown"; + + switch (type) { + case RSPAMD_ARCHIVE_ZIP: + ret = "zip"; + break; + case RSPAMD_ARCHIVE_RAR: + ret = "rar"; + break; + case RSPAMD_ARCHIVE_7ZIP: + ret = "7z"; + break; + case RSPAMD_ARCHIVE_GZIP: + ret = "gz"; + break; + } + + return ret; +} |