summaryrefslogtreecommitdiffstats
path: root/src/archive_manager.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/archive_manager.cc')
-rw-r--r--src/archive_manager.cc406
1 files changed, 406 insertions, 0 deletions
diff --git a/src/archive_manager.cc b/src/archive_manager.cc
new file mode 100644
index 0000000..705842e
--- /dev/null
+++ b/src/archive_manager.cc
@@ -0,0 +1,406 @@
+/**
+ * Copyright (c) 2020, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file archive_manager.cc
+ */
+
+#include <unistd.h>
+
+#include "config.h"
+
+#if HAVE_ARCHIVE_H
+# include "archive.h"
+# include "archive_entry.h"
+#endif
+
+#include "archive_manager.cfg.hh"
+#include "archive_manager.hh"
+#include "base/auto_fd.hh"
+#include "base/auto_mem.hh"
+#include "base/fs_util.hh"
+#include "base/humanize.hh"
+#include "base/injector.hh"
+#include "base/lnav_log.hh"
+#include "base/paths.hh"
+#include "fmt/format.h"
+#include "lnav_util.hh"
+
+namespace fs = ghc::filesystem;
+
+namespace archive_manager {
+
+#if HAVE_ARCHIVE_H
+/**
+ * Enables a subset of the supported archive formats to speed up detection,
+ * since some formats, like xar are unlikely to be used.
+ */
+static void
+enable_desired_archive_formats(archive* arc)
+{
+ /** @feature f0:archive.formats */
+ archive_read_support_format_7zip(arc);
+ archive_read_support_format_cpio(arc);
+ archive_read_support_format_lha(arc);
+ archive_read_support_format_rar(arc);
+ archive_read_support_format_tar(arc);
+ archive_read_support_format_zip(arc);
+}
+#endif
+
+bool
+is_archive(const fs::path& filename)
+{
+#if HAVE_ARCHIVE_H
+ auto_mem<archive> arc(archive_read_free);
+
+ arc = archive_read_new();
+
+ archive_read_support_filter_all(arc);
+ enable_desired_archive_formats(arc);
+ archive_read_support_format_raw(arc);
+ log_debug("read open %s", filename.c_str());
+ auto r = archive_read_open_filename(arc, filename.c_str(), 128 * 1024);
+ if (r == ARCHIVE_OK) {
+ struct archive_entry* entry = nullptr;
+
+ const auto* format_name = archive_format_name(arc);
+
+ log_debug("read next header %s %s", format_name, filename.c_str());
+ if (archive_read_next_header(arc, &entry) == ARCHIVE_OK) {
+ log_debug("read next done %s", filename.c_str());
+
+ static const auto RAW_FORMAT_NAME = string_fragment("raw");
+ static const auto GZ_FILTER_NAME = string_fragment("gzip");
+
+ format_name = archive_format_name(arc);
+
+ if (RAW_FORMAT_NAME == format_name) {
+ auto filter_count = archive_filter_count(arc);
+
+ if (filter_count == 1) {
+ return false;
+ }
+
+ const auto* first_filter_name = archive_filter_name(arc, 0);
+ if (filter_count == 2 && GZ_FILTER_NAME == first_filter_name) {
+ return false;
+ }
+ }
+ log_info(
+ "detected archive: %s -- %s", filename.c_str(), format_name);
+ return true;
+ }
+
+ log_info("archive read header failed: %s -- %s",
+ filename.c_str(),
+ archive_error_string(arc));
+ } else {
+ log_info("archive open failed: %s -- %s",
+ filename.c_str(),
+ archive_error_string(arc));
+ }
+#endif
+
+ return false;
+}
+
+static fs::path
+archive_cache_path()
+{
+ return lnav::paths::workdir() / "archives";
+}
+
+fs::path
+filename_to_tmp_path(const std::string& filename)
+{
+ auto fn_path = fs::path(filename);
+ auto basename = fn_path.filename().string();
+ hasher h;
+
+ h.update(basename);
+ auto fd = auto_fd(lnav::filesystem::openp(filename, O_RDONLY));
+ if (fd != -1) {
+ char buffer[1024];
+ int rc;
+
+ rc = read(fd, buffer, sizeof(buffer));
+ if (rc >= 0) {
+ h.update(buffer, rc);
+ }
+ }
+ basename = fmt::format(FMT_STRING("arc-{}-{}"), h.to_string(), basename);
+
+ return archive_cache_path() / basename;
+}
+
+#if HAVE_ARCHIVE_H
+static walk_result_t
+copy_data(const std::string& filename,
+ struct archive* ar,
+ struct archive_entry* entry,
+ struct archive* aw,
+ const fs::path& entry_path,
+ struct extract_progress* ep)
+{
+ int r;
+ const void* buff;
+ size_t size, total = 0, next_space_check = 0;
+ la_int64_t offset;
+
+ for (;;) {
+ if (total >= next_space_check) {
+ const auto& cfg = injector::get<const config&>();
+ auto tmp_space = fs::space(entry_path);
+
+ if (tmp_space.available < cfg.amc_min_free_space) {
+ return Err(fmt::format(
+ FMT_STRING("available space on disk ({}) is below the "
+ "minimum-free threshold ({}). Unable to unpack "
+ "'{}' to '{}'"),
+ humanize::file_size(tmp_space.available,
+ humanize::alignment::none),
+ humanize::file_size(cfg.amc_min_free_space,
+ humanize::alignment::none),
+ entry_path.filename().string(),
+ entry_path.parent_path().string()));
+ }
+ next_space_check += 1024 * 1024;
+ }
+
+ r = archive_read_data_block(ar, &buff, &size, &offset);
+ if (r == ARCHIVE_EOF) {
+ return Ok();
+ }
+ if (r != ARCHIVE_OK) {
+ return Err(fmt::format(
+ FMT_STRING("failed to extract '{}' from archive '{}' -- {}"),
+ archive_entry_pathname_utf8(entry),
+ filename,
+ archive_error_string(ar)));
+ }
+ r = archive_write_data_block(aw, buff, size, offset);
+ if (r != ARCHIVE_OK) {
+ return Err(fmt::format(FMT_STRING("failed to write file: {} -- {}"),
+ entry_path.string(),
+ archive_error_string(aw)));
+ }
+
+ total += size;
+ ep->ep_out_size.fetch_add(size);
+ }
+}
+
+static walk_result_t
+extract(const std::string& filename, const extract_cb& cb)
+{
+ static const int FLAGS = ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_PERM
+ | ARCHIVE_EXTRACT_ACL | ARCHIVE_EXTRACT_FFLAGS;
+
+ std::error_code ec;
+ auto tmp_path = filename_to_tmp_path(filename);
+
+ fs::create_directories(tmp_path.parent_path(), ec);
+ if (ec) {
+ return Err(fmt::format("unable to create directory: {} -- {}",
+ tmp_path.parent_path().string(),
+ ec.message()));
+ }
+
+ auto arc_lock = lnav::filesystem::file_lock(tmp_path);
+ auto lock_guard = lnav::filesystem::file_lock::guard(&arc_lock);
+ auto done_path = tmp_path;
+
+ done_path += ".done";
+
+ if (fs::exists(done_path)) {
+ size_t file_count = 0;
+ if (fs::is_directory(tmp_path)) {
+ for (const auto& entry : fs::directory_iterator(tmp_path)) {
+ (void) entry;
+ file_count += 1;
+ }
+ }
+ if (file_count > 0) {
+ fs::last_write_time(done_path, std::chrono::system_clock::now());
+ log_info("%s: archive has already been extracted!",
+ done_path.c_str());
+ return Ok();
+ }
+ log_warning("%s: archive cache has been damaged, re-extracting",
+ done_path.c_str());
+
+ fs::remove(done_path);
+ }
+
+ auto_mem<archive> arc(archive_free);
+ auto_mem<archive> ext(archive_free);
+
+ arc = archive_read_new();
+ enable_desired_archive_formats(arc);
+ archive_read_support_format_raw(arc);
+ archive_read_support_filter_all(arc);
+ ext = archive_write_disk_new();
+ archive_write_disk_set_options(ext, FLAGS);
+ archive_write_disk_set_standard_lookup(ext);
+ if (archive_read_open_filename(arc, filename.c_str(), 10240) != ARCHIVE_OK)
+ {
+ return Err(fmt::format(FMT_STRING("unable to open archive: {} -- {}"),
+ filename,
+ archive_error_string(arc)));
+ }
+
+ log_info("extracting %s to %s", filename.c_str(), tmp_path.c_str());
+ while (true) {
+ struct archive_entry* entry = nullptr;
+ auto r = archive_read_next_header(arc, &entry);
+ if (r == ARCHIVE_EOF) {
+ log_info("all done");
+ break;
+ }
+ if (r != ARCHIVE_OK) {
+ return Err(
+ fmt::format(FMT_STRING("unable to read entry header: {} -- {}"),
+ filename,
+ archive_error_string(arc)));
+ }
+
+ const auto* format_name = archive_format_name(arc);
+ auto filter_count = archive_filter_count(arc);
+
+ auto_mem<archive_entry> wentry(archive_entry_free);
+ wentry = archive_entry_clone(entry);
+ auto desired_pathname = fs::path(archive_entry_pathname(entry));
+ if (strcmp(format_name, "raw") == 0 && filter_count >= 2) {
+ desired_pathname = fs::path(filename).filename();
+ }
+ auto entry_path = tmp_path / desired_pathname;
+ auto* prog = cb(
+ entry_path,
+ archive_entry_size_is_set(entry) ? archive_entry_size(entry) : -1);
+ archive_entry_copy_pathname(wentry, entry_path.c_str());
+ auto entry_mode = archive_entry_mode(wentry);
+
+ archive_entry_set_perm(
+ wentry, S_IRUSR | (S_ISDIR(entry_mode) ? S_IXUSR | S_IWUSR : 0));
+ r = archive_write_header(ext, wentry);
+ if (r < ARCHIVE_OK) {
+ return Err(
+ fmt::format(FMT_STRING("unable to write entry: {} -- {}"),
+ entry_path.string(),
+ archive_error_string(ext)));
+ }
+
+ if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0)
+ {
+ TRY(copy_data(filename, arc, entry, ext, entry_path, prog));
+ }
+ r = archive_write_finish_entry(ext);
+ if (r != ARCHIVE_OK) {
+ return Err(
+ fmt::format(FMT_STRING("unable to finish entry: {} -- {}"),
+ entry_path.string(),
+ archive_error_string(ext)));
+ }
+ }
+ archive_read_close(arc);
+ archive_write_close(ext);
+
+ lnav::filesystem::create_file(done_path, O_WRONLY, 0600);
+
+ return Ok();
+}
+#endif
+
+walk_result_t
+walk_archive_files(
+ const std::string& filename,
+ const extract_cb& cb,
+ const std::function<void(const fs::path&, const fs::directory_entry&)>&
+ callback)
+{
+#if HAVE_ARCHIVE_H
+ auto tmp_path = filename_to_tmp_path(filename);
+
+ auto result = extract(filename, cb);
+ if (result.isErr()) {
+ fs::remove_all(tmp_path);
+ return result;
+ }
+
+ for (const auto& entry : fs::recursive_directory_iterator(tmp_path)) {
+ if (!entry.is_regular_file()) {
+ continue;
+ }
+
+ callback(tmp_path, entry);
+ }
+
+ return Ok();
+#else
+ return Err(std::string("not compiled with libarchive"));
+#endif
+}
+
+void
+cleanup_cache()
+{
+ (void) std::async(std::launch::async, []() {
+ auto now = std::chrono::system_clock::now();
+ auto cache_path = archive_cache_path();
+ const auto& cfg = injector::get<const config&>();
+ std::vector<fs::path> to_remove;
+
+ log_debug("cache-ttl %d", cfg.amc_cache_ttl.count());
+ for (const auto& entry : fs::directory_iterator(cache_path)) {
+ if (entry.path().extension() != ".done") {
+ continue;
+ }
+
+ auto mtime = fs::last_write_time(entry.path());
+ auto exp_time = mtime + cfg.amc_cache_ttl;
+ if (now < exp_time) {
+ continue;
+ }
+
+ to_remove.emplace_back(entry.path());
+ }
+
+ for (auto& entry : to_remove) {
+ log_debug("removing cached archive: %s", entry.c_str());
+ fs::remove(entry);
+
+ entry.replace_extension(".lck");
+ fs::remove(entry);
+
+ entry.replace_extension();
+ fs::remove_all(entry);
+ }
+ });
+}
+
+} // namespace archive_manager