summaryrefslogtreecommitdiffstats
path: root/stream/stream_libarchive.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 20:36:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 20:36:56 +0000
commit51de1d8436100f725f3576aefa24a2bd2057bc28 (patch)
treec6d1d5264b6d40a8d7ca34129f36b7d61e188af3 /stream/stream_libarchive.c
parentInitial commit. (diff)
downloadmpv-51de1d8436100f725f3576aefa24a2bd2057bc28.tar.xz
mpv-51de1d8436100f725f3576aefa24a2bd2057bc28.zip
Adding upstream version 0.37.0.upstream/0.37.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'stream/stream_libarchive.c')
-rw-r--r--stream/stream_libarchive.c623
1 files changed, 623 insertions, 0 deletions
diff --git a/stream/stream_libarchive.c b/stream/stream_libarchive.c
new file mode 100644
index 0000000..ff2d512
--- /dev/null
+++ b/stream/stream_libarchive.c
@@ -0,0 +1,623 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "misc/bstr.h"
+#include "common/common.h"
+#include "misc/thread_tools.h"
+#include "stream.h"
+
+#include "stream_libarchive.h"
+
+#define MP_ARCHIVE_FLAG_MAYBE_ZIP (MP_ARCHIVE_FLAG_PRIV << 0)
+#define MP_ARCHIVE_FLAG_MAYBE_RAR (MP_ARCHIVE_FLAG_PRIV << 1)
+#define MP_ARCHIVE_FLAG_MAYBE_VOLUMES (MP_ARCHIVE_FLAG_PRIV << 2)
+
+struct mp_archive_volume {
+ struct mp_archive *mpa;
+ int index; // volume number (starting with 0, mp_archive.primary_src)
+ struct stream *src; // NULL => not current volume, or 0 sized dummy stream
+ int64_t seek_to;
+ char *url;
+};
+
+static bool probe_rar(struct stream *s)
+{
+ static uint8_t rar_sig[] = {0x52, 0x61, 0x72, 0x21, 0x1a, 0x07};
+ uint8_t buf[6];
+ if (stream_read_peek(s, buf, sizeof(buf)) != sizeof(buf))
+ return false;
+ return memcmp(buf, rar_sig, 6) == 0;
+}
+
+static bool probe_multi_rar(struct stream *s)
+{
+ uint8_t hdr[14];
+ if (stream_read_peek(s, hdr, sizeof(hdr)) == sizeof(hdr)) {
+ // Look for rar mark head & main head (assume they're in order).
+ if (hdr[6] == 0x00 && hdr[7 + 2] == 0x73) {
+ int rflags = hdr[7 + 3] | (hdr[7 + 4] << 8);
+ return rflags & 0x100;
+ }
+ }
+ return false;
+}
+
+static bool probe_zip(struct stream *s)
+{
+ uint8_t p[4];
+ if (stream_read_peek(s, p, sizeof(p)) != sizeof(p))
+ return false;
+ // Lifted from libarchive, BSD license.
+ if (p[0] == 'P' && p[1] == 'K') {
+ if ((p[2] == '\001' && p[3] == '\002') ||
+ (p[2] == '\003' && p[3] == '\004') ||
+ (p[2] == '\005' && p[3] == '\006') ||
+ (p[2] == '\006' && p[3] == '\006') ||
+ (p[2] == '\007' && p[3] == '\010') ||
+ (p[2] == '0' && p[3] == '0'))
+ return true;
+ }
+ return false;
+}
+
+static int mp_archive_probe(struct stream *src)
+{
+ int flags = 0;
+ assert(stream_tell(src) == 0);
+ if (probe_zip(src))
+ flags |= MP_ARCHIVE_FLAG_MAYBE_ZIP;
+
+ if (probe_rar(src)) {
+ flags |= MP_ARCHIVE_FLAG_MAYBE_RAR;
+ if (probe_multi_rar(src))
+ flags |= MP_ARCHIVE_FLAG_MAYBE_VOLUMES;
+ }
+ return flags;
+}
+
+static bool volume_seek(struct mp_archive_volume *vol)
+{
+ if (!vol->src || vol->seek_to < 0)
+ return true;
+ bool r = stream_seek(vol->src, vol->seek_to);
+ vol->seek_to = -1;
+ return r;
+}
+
+static ssize_t read_cb(struct archive *arch, void *priv, const void **buffer)
+{
+ struct mp_archive_volume *vol = priv;
+ if (!vol->src)
+ return 0;
+ if (!volume_seek(vol))
+ return -1;
+ int res = stream_read_partial(vol->src, vol->mpa->buffer,
+ sizeof(vol->mpa->buffer));
+ *buffer = vol->mpa->buffer;
+ return MPMAX(res, 0);
+}
+
+// lazy seek to avoid problems with end seeking over http
+static int64_t seek_cb(struct archive *arch, void *priv,
+ int64_t offset, int whence)
+{
+ struct mp_archive_volume *vol = priv;
+ if (!vol->src)
+ return 0;
+ switch (whence) {
+ case SEEK_SET:
+ vol->seek_to = offset;
+ break;
+ case SEEK_CUR:
+ if (vol->seek_to < 0)
+ vol->seek_to = stream_tell(vol->src);
+ vol->seek_to += offset;
+ break;
+ case SEEK_END: ;
+ int64_t size = stream_get_size(vol->src);
+ if (size < 0)
+ return -1;
+ vol->seek_to = size + offset;
+ break;
+ default:
+ return -1;
+ }
+ return vol->seek_to;
+}
+
+static int64_t skip_cb(struct archive *arch, void *priv, int64_t request)
+{
+ struct mp_archive_volume *vol = priv;
+ if (!vol->src)
+ return request;
+ if (!volume_seek(vol))
+ return -1;
+ int64_t old = stream_tell(vol->src);
+ stream_seek_skip(vol->src, old + request);
+ return stream_tell(vol->src) - old;
+}
+
+static int open_cb(struct archive *arch, void *priv)
+{
+ struct mp_archive_volume *vol = priv;
+ vol->seek_to = -1;
+ if (!vol->src) {
+ // Avoid annoying warnings/latency for known dummy volumes.
+ if (vol->index >= vol->mpa->num_volumes)
+ return ARCHIVE_OK;
+ MP_INFO(vol->mpa, "Opening volume '%s'...\n", vol->url);
+ vol->src = stream_create(vol->url,
+ STREAM_READ |
+ vol->mpa->primary_src->stream_origin,
+ vol->mpa->primary_src->cancel,
+ vol->mpa->primary_src->global);
+ // We pretend that failure to open a stream means it was not found,
+ // we assume in turn means that the volume doesn't exist (since
+ // libarchive builds volumes as some sort of abstraction on top of its
+ // stream layer, and its rar code cannot access volumes or signal
+ // anything related to this). libarchive also encounters a fatal error
+ // when a volume could not be opened. However, due to the way volume
+ // support works, it is fine with 0-sized volumes, which we simulate
+ // whenever vol->src==NULL for an opened volume.
+ if (!vol->src) {
+ vol->mpa->num_volumes = MPMIN(vol->mpa->num_volumes, vol->index);
+ MP_INFO(vol->mpa, "Assuming the volume above was not needed.\n");
+ }
+ return ARCHIVE_OK;
+ }
+
+ // just rewind the primary stream
+ return stream_seek(vol->src, 0) ? ARCHIVE_OK : ARCHIVE_FATAL;
+}
+
+static void volume_close(struct mp_archive_volume *vol)
+{
+ // don't close the primary stream
+ if (vol->src && vol->src != vol->mpa->primary_src) {
+ free_stream(vol->src);
+ vol->src = NULL;
+ }
+}
+
+static int close_cb(struct archive *arch, void *priv)
+{
+ struct mp_archive_volume *vol = priv;
+ volume_close(vol);
+ return ARCHIVE_OK;
+}
+
+static void mp_archive_close(struct mp_archive *mpa)
+{
+ if (mpa && mpa->arch) {
+ archive_read_close(mpa->arch);
+ archive_read_free(mpa->arch);
+ mpa->arch = NULL;
+ }
+}
+
+// Supposedly we're not allowed to continue reading on FATAL returns. Otherwise
+// crashes and other UB is possible. Assume calling the close/free functions is
+// still ok. Return true if it was fatal and the archive was closed.
+static bool mp_archive_check_fatal(struct mp_archive *mpa, int r)
+{
+ if (r > ARCHIVE_FATAL)
+ return false;
+ MP_FATAL(mpa, "fatal error received - closing archive\n");
+ mp_archive_close(mpa);
+ return true;
+}
+
+void mp_archive_free(struct mp_archive *mpa)
+{
+ mp_archive_close(mpa);
+ if (mpa && mpa->locale)
+ freelocale(mpa->locale);
+ talloc_free(mpa);
+}
+
+static bool add_volume(struct mp_archive *mpa, struct stream *src,
+ const char* url, int index)
+{
+ struct mp_archive_volume *vol = talloc_zero(mpa, struct mp_archive_volume);
+ vol->index = index;
+ vol->mpa = mpa;
+ vol->src = src;
+ vol->url = talloc_strdup(vol, url);
+ locale_t oldlocale = uselocale(mpa->locale);
+ bool res = archive_read_append_callback_data(mpa->arch, vol) == ARCHIVE_OK;
+ uselocale(oldlocale);
+ return res;
+}
+
+static char *standard_volume_url(void *ctx, const char *format,
+ struct bstr base, int index)
+{
+ return talloc_asprintf(ctx, format, BSTR_P(base), index);
+}
+
+static char *old_rar_volume_url(void *ctx, const char *format,
+ struct bstr base, int index)
+{
+ return talloc_asprintf(ctx, format, BSTR_P(base),
+ 'r' + index / 100, index % 100);
+}
+
+struct file_pattern {
+ const char *match;
+ const char *format;
+ char *(*volume_url)(void *ctx, const char *format,
+ struct bstr base, int index);
+ int start;
+ int stop;
+ bool legacy;
+};
+
+static const struct file_pattern patterns[] = {
+ { ".part1.rar", "%.*s.part%.1d.rar", standard_volume_url, 2, 9 },
+ { ".part01.rar", "%.*s.part%.2d.rar", standard_volume_url, 2, 99 },
+ { ".part001.rar", "%.*s.part%.3d.rar", standard_volume_url, 2, 999 },
+ { ".part0001.rar", "%.*s.part%.4d.rar", standard_volume_url, 2, 9999 },
+ { ".rar", "%.*s.%c%.2d", old_rar_volume_url, 0, 99, true },
+ { ".001", "%.*s.%.3d", standard_volume_url, 2, 9999 },
+ { NULL, NULL, NULL, 0, 0 },
+};
+
+static bool find_volumes(struct mp_archive *mpa, int flags)
+{
+ struct bstr primary_url = bstr0(mpa->primary_src->url);
+
+ const struct file_pattern *pattern = patterns;
+ while (pattern->match) {
+ if (bstr_endswith0(primary_url, pattern->match))
+ break;
+ pattern++;
+ }
+
+ if (!pattern->match)
+ return true;
+ if (pattern->legacy && !(flags & MP_ARCHIVE_FLAG_MAYBE_VOLUMES))
+ return true;
+
+ struct bstr base = bstr_splice(primary_url, 0, -(int)strlen(pattern->match));
+ for (int i = pattern->start; i <= pattern->stop; i++) {
+ char* url = pattern->volume_url(mpa, pattern->format, base, i);
+
+ if (!add_volume(mpa, NULL, url, i + 1))
+ return false;
+ }
+
+ MP_WARN(mpa, "This appears to be a multi-volume archive.\n"
+ "Support is not very good due to libarchive limitations.\n"
+ "There are known cases of libarchive crashing mpv on these.\n"
+ "This is also an excessively inefficient and stupid way to distribute\n"
+ "media files. People creating them should rethink this.\n");
+
+ return true;
+}
+
+static struct mp_archive *mp_archive_new_raw(struct mp_log *log,
+ struct stream *src,
+ int flags, int max_volumes)
+{
+ struct mp_archive *mpa = talloc_zero(NULL, struct mp_archive);
+ mpa->log = log;
+ mpa->locale = newlocale(LC_CTYPE_MASK, "C.UTF-8", (locale_t)0);
+ if (!mpa->locale) {
+ mpa->locale = newlocale(LC_CTYPE_MASK, "", (locale_t)0);
+ if (!mpa->locale)
+ goto err;
+ }
+ mpa->arch = archive_read_new();
+ mpa->primary_src = src;
+ if (!mpa->arch)
+ goto err;
+
+ mpa->flags = flags;
+ mpa->num_volumes = max_volumes ? max_volumes : INT_MAX;
+
+ // first volume is the primary stream
+ if (!add_volume(mpa, src, src->url, 0))
+ goto err;
+
+ if (!(flags & MP_ARCHIVE_FLAG_NO_VOLUMES)) {
+ // try to open other volumes
+ if (!find_volumes(mpa, flags))
+ goto err;
+ }
+
+ locale_t oldlocale = uselocale(mpa->locale);
+
+ archive_read_support_format_rar(mpa->arch);
+ archive_read_support_format_rar5(mpa->arch);
+
+ // Exclude other formats if it's probably RAR, because other formats may
+ // behave suboptimal with multiple volumes exposed, such as opening every
+ // single volume by seeking at the end of the file.
+ if (!(flags & MP_ARCHIVE_FLAG_MAYBE_RAR)) {
+ archive_read_support_format_7zip(mpa->arch);
+ archive_read_support_format_iso9660(mpa->arch);
+ archive_read_support_filter_bzip2(mpa->arch);
+ archive_read_support_filter_gzip(mpa->arch);
+ archive_read_support_filter_xz(mpa->arch);
+ archive_read_support_format_zip_streamable(mpa->arch);
+
+ // This zip reader is normally preferable. However, it seeks to the end
+ // of the file, which may be annoying (HTTP reconnect, volume skipping),
+ // so use it only as last resort, or if it's relatively likely that it's
+ // really zip.
+ if (flags & (MP_ARCHIVE_FLAG_UNSAFE | MP_ARCHIVE_FLAG_MAYBE_ZIP))
+ archive_read_support_format_zip_seekable(mpa->arch);
+ }
+
+ archive_read_set_read_callback(mpa->arch, read_cb);
+ archive_read_set_skip_callback(mpa->arch, skip_cb);
+ archive_read_set_open_callback(mpa->arch, open_cb);
+ // Allow it to close a volume.
+ archive_read_set_close_callback(mpa->arch, close_cb);
+ if (mpa->primary_src->seekable)
+ archive_read_set_seek_callback(mpa->arch, seek_cb);
+ bool fail = archive_read_open1(mpa->arch) < ARCHIVE_OK;
+
+ uselocale(oldlocale);
+
+ if (fail)
+ goto err;
+
+ return mpa;
+
+err:
+ mp_archive_free(mpa);
+ return NULL;
+}
+
+struct mp_archive *mp_archive_new(struct mp_log *log, struct stream *src,
+ int flags, int max_volumes)
+{
+ flags |= mp_archive_probe(src);
+ return mp_archive_new_raw(log, src, flags, max_volumes);
+}
+
+// Iterate entries. The first call establishes the first entry. Returns false
+// if no entry found, otherwise returns true and sets mpa->entry/entry_filename.
+bool mp_archive_next_entry(struct mp_archive *mpa)
+{
+ mpa->entry = NULL;
+ talloc_free(mpa->entry_filename);
+ mpa->entry_filename = NULL;
+
+ if (!mpa->arch)
+ return false;
+
+ locale_t oldlocale = uselocale(mpa->locale);
+ bool success = false;
+
+ while (!mp_cancel_test(mpa->primary_src->cancel)) {
+ struct archive_entry *entry;
+ int r = archive_read_next_header(mpa->arch, &entry);
+ if (r == ARCHIVE_EOF)
+ break;
+ if (r < ARCHIVE_OK)
+ MP_ERR(mpa, "%s\n", archive_error_string(mpa->arch));
+ if (r < ARCHIVE_WARN) {
+ MP_FATAL(mpa, "could not read archive entry\n");
+ mp_archive_check_fatal(mpa, r);
+ break;
+ }
+ if (archive_entry_filetype(entry) != AE_IFREG)
+ continue;
+ // Some archives may have no filenames, or libarchive won't return some.
+ const char *fn = archive_entry_pathname(entry);
+ char buf[64];
+ if (!fn || bstr_validate_utf8(bstr0(fn)) < 0) {
+ snprintf(buf, sizeof(buf), "mpv_unknown#%d", mpa->entry_num);
+ fn = buf;
+ }
+ mpa->entry = entry;
+ mpa->entry_filename = talloc_strdup(mpa, fn);
+ mpa->entry_num += 1;
+ success = true;
+ break;
+ }
+
+ uselocale(oldlocale);
+
+ return success;
+}
+
+struct priv {
+ struct mp_archive *mpa;
+ bool broken_seek;
+ struct stream *src;
+ int64_t entry_size;
+ char *entry_name;
+};
+
+static int reopen_archive(stream_t *s)
+{
+ struct priv *p = s->priv;
+ s->pos = 0;
+ if (!p->mpa) {
+ p->mpa = mp_archive_new(s->log, p->src, MP_ARCHIVE_FLAG_UNSAFE, 0);
+ } else {
+ int flags = p->mpa->flags;
+ int num_volumes = p->mpa->num_volumes;
+ mp_archive_free(p->mpa);
+ p->mpa = mp_archive_new_raw(s->log, p->src, flags, num_volumes);
+ }
+
+ if (!p->mpa)
+ return STREAM_ERROR;
+
+ // Follows the same logic as demux_libarchive.c.
+ struct mp_archive *mpa = p->mpa;
+ while (mp_archive_next_entry(mpa)) {
+ if (strcmp(p->entry_name, mpa->entry_filename) == 0) {
+ locale_t oldlocale = uselocale(mpa->locale);
+ p->entry_size = -1;
+ if (archive_entry_size_is_set(mpa->entry))
+ p->entry_size = archive_entry_size(mpa->entry);
+ uselocale(oldlocale);
+ return STREAM_OK;
+ }
+ }
+
+ mp_archive_free(p->mpa);
+ p->mpa = NULL;
+ MP_ERR(s, "archive entry not found. '%s'\n", p->entry_name);
+ return STREAM_ERROR;
+}
+
+static int archive_entry_fill_buffer(stream_t *s, void *buffer, int max_len)
+{
+ struct priv *p = s->priv;
+ if (!p->mpa)
+ return 0;
+ locale_t oldlocale = uselocale(p->mpa->locale);
+ int r = archive_read_data(p->mpa->arch, buffer, max_len);
+ if (r < 0) {
+ MP_ERR(s, "%s\n", archive_error_string(p->mpa->arch));
+ if (mp_archive_check_fatal(p->mpa, r)) {
+ mp_archive_free(p->mpa);
+ p->mpa = NULL;
+ }
+ }
+ uselocale(oldlocale);
+ return r;
+}
+
+static int archive_entry_seek(stream_t *s, int64_t newpos)
+{
+ struct priv *p = s->priv;
+ if (p->mpa && !p->broken_seek) {
+ locale_t oldlocale = uselocale(p->mpa->locale);
+ int r = archive_seek_data(p->mpa->arch, newpos, SEEK_SET);
+ uselocale(oldlocale);
+ if (r >= 0)
+ return 1;
+ MP_WARN(s, "possibly unsupported seeking - switching to reopening\n");
+ p->broken_seek = true;
+ if (reopen_archive(s) < STREAM_OK)
+ return -1;
+ }
+ // libarchive can't seek in most formats.
+ if (newpos < s->pos) {
+ // Hack seeking backwards into working by reopening the archive and
+ // starting over.
+ MP_VERBOSE(s, "trying to reopen archive for performing seek\n");
+ if (reopen_archive(s) < STREAM_OK)
+ return -1;
+ }
+ if (newpos > s->pos) {
+ if (!p->mpa && reopen_archive(s) < STREAM_OK)
+ return -1;
+ // For seeking forwards, just keep reading data (there's no libarchive
+ // skip function either).
+ char buffer[4096];
+ while (newpos > s->pos) {
+ if (mp_cancel_test(s->cancel))
+ return -1;
+
+ int size = MPMIN(newpos - s->pos, sizeof(buffer));
+ locale_t oldlocale = uselocale(p->mpa->locale);
+ int r = archive_read_data(p->mpa->arch, buffer, size);
+ if (r <= 0) {
+ if (r == 0 && newpos > p->entry_size) {
+ MP_ERR(s, "demuxer trying to seek beyond end of archive "
+ "entry\n");
+ } else if (r == 0) {
+ MP_ERR(s, "end of archive entry reached while seeking\n");
+ } else {
+ MP_ERR(s, "%s\n", archive_error_string(p->mpa->arch));
+ }
+ uselocale(oldlocale);
+ if (mp_archive_check_fatal(p->mpa, r)) {
+ mp_archive_free(p->mpa);
+ p->mpa = NULL;
+ }
+ return -1;
+ }
+ uselocale(oldlocale);
+ s->pos += r;
+ }
+ }
+ return 1;
+}
+
+static void archive_entry_close(stream_t *s)
+{
+ struct priv *p = s->priv;
+ mp_archive_free(p->mpa);
+ free_stream(p->src);
+}
+
+static int64_t archive_entry_get_size(stream_t *s)
+{
+ struct priv *p = s->priv;
+ return p->entry_size;
+}
+
+static int archive_entry_open(stream_t *stream)
+{
+ struct priv *p = talloc_zero(stream, struct priv);
+ stream->priv = p;
+
+ if (!strchr(stream->path, '|'))
+ return STREAM_ERROR;
+
+ char *base = talloc_strdup(p, stream->path);
+ char *name = strchr(base, '|');
+ if (!name)
+ return STREAM_ERROR;
+ *name++ = '\0';
+ if (name[0] == '/')
+ name += 1;
+ p->entry_name = name;
+ mp_url_unescape_inplace(base);
+
+ p->src = stream_create(base, STREAM_READ | stream->stream_origin,
+ stream->cancel, stream->global);
+ if (!p->src) {
+ archive_entry_close(stream);
+ return STREAM_ERROR;
+ }
+
+ int r = reopen_archive(stream);
+ if (r < STREAM_OK) {
+ archive_entry_close(stream);
+ return r;
+ }
+
+ stream->fill_buffer = archive_entry_fill_buffer;
+ if (p->src->seekable) {
+ stream->seek = archive_entry_seek;
+ stream->seekable = true;
+ }
+ stream->close = archive_entry_close;
+ stream->get_size = archive_entry_get_size;
+ stream->streaming = true;
+
+ return STREAM_OK;
+}
+
+const stream_info_t stream_info_libarchive = {
+ .name = "libarchive",
+ .open = archive_entry_open,
+ .protocols = (const char*const[]){ "archive", NULL },
+};