summaryrefslogtreecommitdiffstats
path: root/src/lib-fs/fs-api.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib-fs/fs-api.c1407
1 files changed, 1407 insertions, 0 deletions
diff --git a/src/lib-fs/fs-api.c b/src/lib-fs/fs-api.c
new file mode 100644
index 0000000..1b47ded
--- /dev/null
+++ b/src/lib-fs/fs-api.c
@@ -0,0 +1,1407 @@
+/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "module-dir.h"
+#include "llist.h"
+#include "str.h"
+#include "hash-method.h"
+#include "istream.h"
+#include "istream-seekable.h"
+#include "ostream.h"
+#include "stats-dist.h"
+#include "time-util.h"
+#include "istream-fs-stats.h"
+#include "fs-api-private.h"
+
+static struct event_category event_category_fs = {
+ .name = "fs"
+};
+
+struct fs_api_module_register fs_api_module_register = { 0 };
+
+static struct module *fs_modules = NULL;
+static ARRAY(const struct fs *) fs_classes;
+
+static void fs_classes_init(void);
+
+static struct event *fs_create_event(struct fs *fs, struct event *parent)
+{
+ struct event *event;
+
+ event = event_create(parent);
+ event_add_category(event, &event_category_fs);
+ event_set_append_log_prefix(event,
+ t_strdup_printf("fs-%s: ", fs->name));
+ return event;
+}
+
+static int
+fs_alloc(const struct fs *fs_class, const char *args,
+ const struct fs_settings *set, struct fs **fs_r, const char **error_r)
+{
+ struct fs *fs;
+ const char *error;
+ int ret;
+
+ fs = fs_class->v.alloc();
+ fs->refcount = 1;
+ fs->set.debug = set->debug;
+ fs->set.enable_timing = set->enable_timing;
+ i_array_init(&fs->module_contexts, 5);
+ fs->event = fs_create_event(fs, set->event_parent);
+ event_set_forced_debug(fs->event, fs->set.debug);
+
+ T_BEGIN {
+ ret = fs_class->v.init(fs, args, set, &error);
+ } T_END_PASS_STR_IF(ret < 0, &error);
+ if (ret < 0) {
+ /* a bit kludgy way to allow data stack frame usage in normal
+ conditions but still be able to return error message from
+ data stack. */
+ *error_r = t_strdup_printf("%s: %s", fs_class->name, error);
+ fs_unref(&fs);
+ return -1;
+ }
+ fs->username = i_strdup(set->username);
+ fs->session_id = i_strdup(set->session_id);
+ *fs_r = fs;
+ return 0;
+}
+
+void fs_class_register(const struct fs *fs_class)
+{
+ if (!array_is_created(&fs_classes))
+ fs_classes_init();
+ array_push_back(&fs_classes, &fs_class);
+}
+
+static void fs_classes_deinit(void)
+{
+ array_free(&fs_classes);
+}
+
+static void fs_classes_init(void)
+{
+ i_array_init(&fs_classes, 8);
+ fs_class_register(&fs_class_dict);
+ fs_class_register(&fs_class_posix);
+ fs_class_register(&fs_class_randomfail);
+ fs_class_register(&fs_class_metawrap);
+ fs_class_register(&fs_class_sis);
+ fs_class_register(&fs_class_sis_queue);
+ fs_class_register(&fs_class_test);
+ lib_atexit(fs_classes_deinit);
+}
+
+static const struct fs *fs_class_find(const char *driver)
+{
+ const struct fs *class;
+
+ if (!array_is_created(&fs_classes))
+ fs_classes_init();
+
+ array_foreach_elem(&fs_classes, class) {
+ if (strcmp(class->name, driver) == 0)
+ return class;
+ }
+ return NULL;
+}
+
+static void fs_class_deinit_modules(void)
+{
+ module_dir_unload(&fs_modules);
+}
+
+static const char *fs_driver_module_name(const char *driver)
+{
+ return t_str_replace(driver, '-', '_');
+}
+
+static void fs_class_try_load_plugin(const char *driver)
+{
+ const char *module_name =
+ t_strdup_printf("fs_%s", fs_driver_module_name(driver));
+ struct module *module;
+ struct module_dir_load_settings mod_set;
+ const struct fs *fs_class;
+
+ i_zero(&mod_set);
+ mod_set.abi_version = DOVECOT_ABI_VERSION;
+ mod_set.ignore_missing = TRUE;
+
+ fs_modules = module_dir_load_missing(fs_modules, MODULE_DIR,
+ module_name, &mod_set);
+ module_dir_init(fs_modules);
+
+ module = module_dir_find(fs_modules, module_name);
+ fs_class = module == NULL ? NULL :
+ module_get_symbol(module, t_strdup_printf(
+ "fs_class_%s", fs_driver_module_name(driver)));
+ if (fs_class != NULL)
+ fs_class_register(fs_class);
+
+ lib_atexit(fs_class_deinit_modules);
+}
+
+int fs_init(const char *driver, const char *args,
+ const struct fs_settings *set,
+ struct fs **fs_r, const char **error_r)
+{
+ const struct fs *fs_class;
+ const char *temp_file_prefix;
+
+ fs_class = fs_class_find(driver);
+ if (fs_class == NULL) {
+ T_BEGIN {
+ fs_class_try_load_plugin(driver);
+ } T_END;
+ fs_class = fs_class_find(driver);
+ }
+ if (fs_class == NULL) {
+ *error_r = t_strdup_printf("Unknown fs driver: %s", driver);
+ return -1;
+ }
+ if (fs_alloc(fs_class, args, set, fs_r, error_r) < 0)
+ return -1;
+ event_set_ptr((*fs_r)->event, FS_EVENT_FIELD_FS, *fs_r);
+
+ temp_file_prefix = set->temp_file_prefix != NULL ?
+ set->temp_file_prefix : ".temp.dovecot";
+ if(set->temp_dir == NULL)
+ (*fs_r)->temp_path_prefix = i_strconcat("/tmp/",
+ temp_file_prefix, NULL);
+ else
+ (*fs_r)->temp_path_prefix = i_strconcat(set->temp_dir, "/",
+ temp_file_prefix, NULL);
+ return 0;
+}
+
+int fs_init_from_string(const char *str, const struct fs_settings *set,
+ struct fs **fs_r, const char **error_r)
+{
+ const char *args = strpbrk(str, " :");
+ if (args == NULL)
+ args = "";
+ else
+ str = t_strdup_until(str, args++);
+ return fs_init(str, args, set, fs_r, error_r);
+}
+
+void fs_deinit(struct fs **fs)
+{
+ fs_unref(fs);
+}
+
+void fs_ref(struct fs *fs)
+{
+ i_assert(fs->refcount > 0);
+
+ fs->refcount++;
+}
+
+void fs_unref(struct fs **_fs)
+{
+ struct fs *fs = *_fs;
+ struct array module_contexts_arr;
+ unsigned int i;
+
+ if (fs == NULL)
+ return;
+
+ module_contexts_arr = fs->module_contexts.arr;
+
+ i_assert(fs->refcount > 0);
+
+ *_fs = NULL;
+
+ if (--fs->refcount > 0)
+ return;
+
+ if (fs->files_open_count > 0) {
+ i_panic("fs-%s: %u files still open (first = %s)",
+ fs->name, fs->files_open_count, fs_file_path(fs->files));
+ }
+ i_assert(fs->files == NULL);
+
+ if (fs->v.deinit != NULL)
+ fs->v.deinit(fs);
+
+ fs_deinit(&fs->parent);
+ event_unref(&fs->event);
+ i_free(fs->username);
+ i_free(fs->session_id);
+ i_free(fs->temp_path_prefix);
+ for (i = 0; i < FS_OP_COUNT; i++) {
+ if (fs->stats.timings[i] != NULL)
+ stats_dist_deinit(&fs->stats.timings[i]);
+ }
+ T_BEGIN {
+ fs->v.free(fs);
+ } T_END;
+ array_free_i(&module_contexts_arr);
+}
+
+struct fs *fs_get_parent(struct fs *fs)
+{
+ return fs->parent;
+}
+
+const char *fs_get_driver(struct fs *fs)
+{
+ return fs->name;
+}
+
+const char *fs_get_root_driver(struct fs *fs)
+{
+ while (fs->parent != NULL)
+ fs = fs->parent;
+ return fs->name;
+}
+
+struct fs_file *fs_file_init(struct fs *fs, const char *path, int mode_flags)
+{
+ return fs_file_init_with_event(fs, fs->event, path, mode_flags);
+}
+
+struct fs_file *fs_file_init_with_event(struct fs *fs, struct event *event,
+ const char *path, int mode_flags)
+{
+ struct fs_file *file;
+
+ i_assert(path != NULL);
+ i_assert((mode_flags & FS_OPEN_FLAG_ASYNC_NOQUEUE) == 0 ||
+ (mode_flags & FS_OPEN_FLAG_ASYNC) != 0);
+
+ T_BEGIN {
+ file = fs->v.file_alloc();
+ file->fs = fs;
+ file->flags = mode_flags & ENUM_NEGATE(FS_OPEN_MODE_MASK);
+ file->event = fs_create_event(fs, event);
+ event_set_ptr(file->event, FS_EVENT_FIELD_FS, fs);
+ event_set_ptr(file->event, FS_EVENT_FIELD_FILE, file);
+ fs->v.file_init(file, path, mode_flags & FS_OPEN_MODE_MASK,
+ mode_flags & ENUM_NEGATE(FS_OPEN_MODE_MASK));
+ } T_END;
+
+ fs->files_open_count++;
+ DLLIST_PREPEND(&fs->files, file);
+
+ fs_set_metadata(file, FS_METADATA_ORIG_PATH, path);
+ return file;
+}
+
+void fs_file_deinit(struct fs_file **_file)
+{
+ struct fs_file *file = *_file;
+
+ if (file == NULL)
+ return;
+
+ i_assert(file->fs->files_open_count > 0);
+
+ *_file = NULL;
+
+ fs_file_close(file);
+
+ DLLIST_REMOVE(&file->fs->files, file);
+ file->fs->files_open_count--;
+ T_BEGIN {
+ file->fs->v.file_deinit(file);
+ } T_END;
+}
+
+void fs_file_free(struct fs_file *file)
+{
+ if (file->last_error_changed) {
+ /* fs_set_error() used without ever accessing it via
+ fs_file_last_error(). Log it to make sure it's not lost.
+ Note that the errors are always set only to the file at
+ the root of the parent hierarchy. */
+ e_error(file->event, "%s (in file %s deinit)",
+ file->last_error, fs_file_path(file));
+ }
+
+ fs_file_deinit(&file->parent);
+ event_unref(&file->event);
+ pool_unref(&file->metadata_pool);
+ i_free(file->last_error);
+}
+
+void fs_file_set_flags(struct fs_file *file,
+ enum fs_open_flags add_flags,
+ enum fs_open_flags remove_flags)
+{
+ file->flags |= add_flags;
+ file->flags &= ENUM_NEGATE(remove_flags);
+
+ if (file->parent != NULL)
+ fs_file_set_flags(file->parent, add_flags, remove_flags);
+}
+
+void fs_file_close(struct fs_file *file)
+{
+ if (file == NULL)
+ return;
+
+ i_assert(!file->writing_stream);
+ i_assert(file->output == NULL);
+
+ if (file->pending_read_input != NULL)
+ i_stream_unref(&file->pending_read_input);
+ if (file->seekable_input != NULL)
+ i_stream_unref(&file->seekable_input);
+
+ if (file->copy_input != NULL) {
+ i_stream_unref(&file->copy_input);
+ fs_write_stream_abort_error(file, &file->copy_output, "fs_file_close(%s)",
+ o_stream_get_name(file->copy_output));
+ }
+ i_free_and_null(file->write_digest);
+ if (file->fs->v.file_close != NULL) T_BEGIN {
+ file->fs->v.file_close(file);
+ } T_END;
+
+ /* check this only after closing, because some of the fs backends keep
+ the istream internally open and don't call the destroy-callback
+ until after file_close() */
+ i_assert(!file->istream_open);
+}
+
+enum fs_properties fs_get_properties(struct fs *fs)
+{
+ return fs->v.get_properties(fs);
+}
+
+void fs_metadata_init(struct fs_file *file)
+{
+ if (file->metadata_pool == NULL) {
+ i_assert(!array_is_created(&file->metadata));
+ file->metadata_pool = pool_alloconly_create("fs metadata", 1024);
+ p_array_init(&file->metadata, file->metadata_pool, 8);
+ }
+}
+
+void fs_metadata_init_or_clear(struct fs_file *file)
+{
+ if (file->metadata_pool == NULL)
+ fs_metadata_init(file);
+ else T_BEGIN {
+ const struct fs_metadata *md;
+ ARRAY_TYPE(fs_metadata) internal_metadata;
+
+ t_array_init(&internal_metadata, 4);
+ array_foreach(&file->metadata, md) {
+ if (strncmp(md->key, FS_METADATA_INTERNAL_PREFIX,
+ strlen(FS_METADATA_INTERNAL_PREFIX)) == 0)
+ array_push_back(&internal_metadata, md);
+ }
+ array_clear(&file->metadata);
+ array_append_array(&file->metadata, &internal_metadata);
+ } T_END;
+}
+
+static struct fs_metadata *
+fs_metadata_find_md(const ARRAY_TYPE(fs_metadata) *metadata,
+ const char *key)
+{
+ struct fs_metadata *md;
+
+ array_foreach_modifiable(metadata, md) {
+ if (strcmp(md->key, key) == 0)
+ return md;
+ }
+ return NULL;
+}
+
+void fs_default_set_metadata(struct fs_file *file,
+ const char *key, const char *value)
+{
+ struct fs_metadata *metadata;
+
+ fs_metadata_init(file);
+ metadata = fs_metadata_find_md(&file->metadata, key);
+ if (metadata == NULL) {
+ metadata = array_append_space(&file->metadata);
+ metadata->key = p_strdup(file->metadata_pool, key);
+ }
+ metadata->value = p_strdup(file->metadata_pool, value);
+}
+
+const char *fs_metadata_find(const ARRAY_TYPE(fs_metadata) *metadata,
+ const char *key)
+{
+ const struct fs_metadata *md;
+
+ if (!array_is_created(metadata))
+ return NULL;
+
+ md = fs_metadata_find_md(metadata, key);
+ return md == NULL ? NULL : md->value;
+}
+
+void fs_set_metadata(struct fs_file *file, const char *key, const char *value)
+{
+ i_assert(key != NULL);
+ i_assert(value != NULL);
+
+ if (file->fs->v.set_metadata != NULL) T_BEGIN {
+ file->fs->v.set_metadata(file, key, value);
+ if (strncmp(key, FS_METADATA_INTERNAL_PREFIX,
+ strlen(FS_METADATA_INTERNAL_PREFIX)) == 0) {
+ /* internal metadata change, which isn't stored. */
+ } else {
+ file->metadata_changed = TRUE;
+ }
+ } T_END;
+}
+
+static void fs_file_timing_start(struct fs_file *file, enum fs_op op)
+{
+ if (!file->fs->set.enable_timing)
+ return;
+ if (file->timing_start[op].tv_sec == 0)
+ i_gettimeofday(&file->timing_start[op]);
+}
+
+static void
+fs_timing_end(struct stats_dist **timing, const struct timeval *start_tv)
+{
+ struct timeval now;
+ long long diff;
+
+ i_gettimeofday(&now);
+
+ diff = timeval_diff_usecs(&now, start_tv);
+ if (diff > 0) {
+ if (*timing == NULL)
+ *timing = stats_dist_init();
+ stats_dist_add(*timing, diff);
+ }
+}
+
+void fs_file_timing_end(struct fs_file *file, enum fs_op op)
+{
+ if (!file->fs->set.enable_timing || file->timing_start[op].tv_sec == 0)
+ return;
+
+ fs_timing_end(&file->fs->stats.timings[op], &file->timing_start[op]);
+ /* don't count this again */
+ file->timing_start[op].tv_sec = 0;
+}
+
+int fs_get_metadata_full(struct fs_file *file,
+ enum fs_get_metadata_flags flags,
+ const ARRAY_TYPE(fs_metadata) **metadata_r)
+{
+ int ret;
+
+ if (file->fs->v.get_metadata == NULL) {
+ if (array_is_created(&file->metadata)) {
+ /* Return internal metadata. */
+ *metadata_r = &file->metadata;
+ return 0;
+ }
+ fs_set_error(file->event, ENOTSUP, "Metadata not supported by backend");
+ return -1;
+ }
+ if (!file->read_or_prefetch_counted &&
+ !file->lookup_metadata_counted) {
+ if ((flags & FS_GET_METADATA_FLAG_LOADED_ONLY) == 0) {
+ file->lookup_metadata_counted = TRUE;
+ file->fs->stats.lookup_metadata_count++;
+ }
+ fs_file_timing_start(file, FS_OP_METADATA);
+ }
+ T_BEGIN {
+ ret = file->fs->v.get_metadata(file, flags, metadata_r);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN))
+ fs_file_timing_end(file, FS_OP_METADATA);
+ return ret;
+}
+
+int fs_get_metadata(struct fs_file *file,
+ const ARRAY_TYPE(fs_metadata) **metadata_r)
+{
+ return fs_get_metadata_full(file, 0, metadata_r);
+}
+
+int fs_lookup_metadata(struct fs_file *file, const char *key,
+ const char **value_r)
+{
+ const ARRAY_TYPE(fs_metadata) *metadata;
+
+ if (fs_get_metadata(file, &metadata) < 0)
+ return -1;
+ *value_r = fs_metadata_find(metadata, key);
+ return *value_r != NULL ? 1 : 0;
+}
+
+const char *fs_lookup_loaded_metadata(struct fs_file *file, const char *key)
+{
+ const ARRAY_TYPE(fs_metadata) *metadata;
+
+ if (fs_get_metadata_full(file, FS_GET_METADATA_FLAG_LOADED_ONLY, &metadata) < 0)
+ i_panic("FS_GET_METADATA_FLAG_LOADED_ONLY lookup can't fail");
+ return fs_metadata_find(metadata, key);
+}
+
+const char *fs_file_path(struct fs_file *file)
+{
+ return file->fs->v.get_path == NULL ? file->path :
+ file->fs->v.get_path(file);
+}
+
+struct fs *fs_file_fs(struct fs_file *file)
+{
+ return file->fs;
+}
+
+struct event *fs_file_event(struct fs_file *file)
+{
+ return file->event;
+}
+
+static struct fs_file *fs_file_get_error_file(struct fs_file *file)
+{
+ /* the error is always kept in the parentmost file */
+ while (file->parent != NULL)
+ file = file->parent;
+ return file;
+}
+
+static void ATTR_FORMAT(2, 0)
+fs_set_verror(struct event *event, const char *fmt, va_list args)
+{
+ struct event *fs_event = event;
+ struct fs_file *file;
+ struct fs_iter *iter;
+
+ /* NOTE: the event might be a passthrough event. We must log it exactly
+ once so it gets freed. */
+
+ /* figure out if the error is for a file or iter */
+ while ((file = event_get_ptr(fs_event, FS_EVENT_FIELD_FILE)) == NULL &&
+ (iter = event_get_ptr(fs_event, FS_EVENT_FIELD_ITER)) == NULL) {
+ fs_event = event_get_parent(fs_event);
+ i_assert(fs_event != NULL);
+ }
+
+ char *new_error = i_strdup_vprintf(fmt, args);
+ /* Don't flood the debug log with "Asynchronous operation in progress"
+ messages. They tell nothing useful. */
+ if (errno != EAGAIN)
+ e_debug(event, "%s", new_error);
+ else
+ event_send_abort(event);
+
+ /* free old error after strdup in case args point to the old error */
+ if (file != NULL) {
+ file = fs_file_get_error_file(file);
+ char *old_error = file->last_error;
+
+ if (old_error == NULL) {
+ i_assert(!file->last_error_changed);
+ } else if (strcmp(old_error, new_error) == 0) {
+ /* identical error - ignore */
+ } else if (file->last_error_changed) {
+ /* multiple fs_set_error() calls used without
+ fs_file_last_error() in the middle. */
+ e_error(file->event, "%s (overwriting error for file %s)",
+ old_error, fs_file_path(file));
+ }
+ if (errno == EAGAIN || errno == ENOENT || errno == EEXIST ||
+ errno == ENOTEMPTY) {
+ /* These are (or can be) expected errors - don't log
+ them if they have a missing fs_file_last_error()
+ call */
+ file->last_error_changed = FALSE;
+ } else {
+ file->last_error_changed = TRUE;
+ }
+
+ i_free(file->last_error);
+ file->last_error = new_error;
+ } else {
+ i_assert(iter != NULL);
+ if (iter->last_error != NULL &&
+ strcmp(iter->last_error, new_error) == 0) {
+ /* identical error - ignore */
+ } else if (iter->last_error != NULL) {
+ /* multiple fs_set_error() calls before the iter
+ finishes */
+ e_error(iter->fs->event, "%s (overwriting error for file %s)",
+ iter->last_error, iter->path);
+ }
+ i_free(iter->last_error);
+ iter->last_error = new_error;
+ }
+}
+
+const char *fs_file_last_error(struct fs_file *file)
+{
+ struct fs_file *error_file = fs_file_get_error_file(file);
+
+ error_file->last_error_changed = FALSE;
+ if (error_file->last_error == NULL)
+ return "BUG: Unknown file error";
+ return error_file->last_error;
+}
+
+bool fs_prefetch(struct fs_file *file, uoff_t length)
+{
+ bool ret;
+
+ if (!file->read_or_prefetch_counted) {
+ file->read_or_prefetch_counted = TRUE;
+ file->fs->stats.prefetch_count++;
+ fs_file_timing_start(file, FS_OP_PREFETCH);
+ }
+ T_BEGIN {
+ ret = file->fs->v.prefetch(file, length);
+ } T_END;
+ fs_file_timing_end(file, FS_OP_PREFETCH);
+ return ret;
+}
+
+ssize_t fs_read_via_stream(struct fs_file *file, void *buf, size_t size)
+{
+ const unsigned char *data;
+ size_t data_size;
+ ssize_t ret;
+
+ i_assert(size > 0);
+
+ if (file->pending_read_input == NULL)
+ file->pending_read_input = fs_read_stream(file, size+1);
+ ret = i_stream_read_bytes(file->pending_read_input, &data,
+ &data_size, size);
+ if (ret == 0) {
+ fs_file_set_error_async(file);
+ return -1;
+ }
+ if (ret < 0 && file->pending_read_input->stream_errno != 0) {
+ fs_set_error(file->event,
+ file->pending_read_input->stream_errno,
+ "read(%s) failed: %s",
+ i_stream_get_name(file->pending_read_input),
+ i_stream_get_error(file->pending_read_input));
+ } else {
+ ret = I_MIN(size, data_size);
+ memcpy(buf, data, ret);
+ }
+ i_stream_unref(&file->pending_read_input);
+ return ret;
+}
+
+ssize_t fs_read(struct fs_file *file, void *buf, size_t size)
+{
+ int ret;
+
+ if (!file->read_or_prefetch_counted) {
+ file->read_or_prefetch_counted = TRUE;
+ file->fs->stats.read_count++;
+ fs_file_timing_start(file, FS_OP_READ);
+ }
+
+ if (file->fs->v.read != NULL) {
+ T_BEGIN {
+ ret = file->fs->v.read(file, buf, size);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN))
+ fs_file_timing_end(file, FS_OP_READ);
+ return ret;
+ }
+
+ /* backend didn't bother to implement read(), but we can do it with
+ streams. */
+ return fs_read_via_stream(file, buf, size);
+}
+
+static void fs_file_istream_destroyed(struct fs_file *file)
+{
+ i_assert(file->istream_open);
+
+ file->istream_open = FALSE;
+}
+
+struct istream *fs_read_stream(struct fs_file *file, size_t max_buffer_size)
+{
+ struct istream *input, *inputs[2];
+ const unsigned char *data;
+ size_t size;
+ ssize_t ret;
+ bool want_seekable = FALSE;
+
+ if (!file->read_or_prefetch_counted) {
+ file->read_or_prefetch_counted = TRUE;
+ file->fs->stats.read_count++;
+ fs_file_timing_start(file, FS_OP_READ);
+ }
+
+ if (file->seekable_input != NULL) {
+ /* allow multiple open streams, each in a different position */
+ input = i_stream_create_limit(file->seekable_input, UOFF_T_MAX);
+ i_stream_seek(input, 0);
+ return input;
+ }
+ i_assert(!file->istream_open);
+ T_BEGIN {
+ input = file->fs->v.read_stream(file, max_buffer_size);
+ } T_END;
+ if (input->stream_errno != 0) {
+ /* read failed already */
+ fs_file_timing_end(file, FS_OP_READ);
+ return input;
+ }
+ if (file->fs->set.enable_timing) {
+ struct istream *input2 = i_stream_create_fs_stats(input, file);
+
+ i_stream_unref(&input);
+ input = input2;
+ }
+
+ if ((file->flags & FS_OPEN_FLAG_SEEKABLE) != 0)
+ want_seekable = TRUE;
+ else if ((file->flags & FS_OPEN_FLAG_ASYNC) == 0 && !input->blocking)
+ want_seekable = TRUE;
+
+ if (want_seekable && !input->seekable) {
+ /* need to make the stream seekable */
+ inputs[0] = input;
+ inputs[1] = NULL;
+ input = i_stream_create_seekable_path(inputs, max_buffer_size,
+ file->fs->temp_path_prefix);
+ i_stream_set_name(input, i_stream_get_name(inputs[0]));
+ i_stream_unref(&inputs[0]);
+ }
+ file->seekable_input = input;
+ i_stream_ref(file->seekable_input);
+
+ if ((file->flags & FS_OPEN_FLAG_ASYNC) == 0 && !input->blocking) {
+ /* read the whole input stream before returning */
+ while ((ret = i_stream_read_more(input, &data, &size)) >= 0) {
+ i_stream_skip(input, size);
+ if (ret == 0)
+ fs_wait_async(file->fs);
+ }
+ i_stream_seek(input, 0);
+ }
+ file->istream_open = TRUE;
+ i_stream_add_destroy_callback(input, fs_file_istream_destroyed, file);
+ return input;
+}
+
+int fs_write_via_stream(struct fs_file *file, const void *data, size_t size)
+{
+ struct ostream *output;
+ ssize_t ret;
+ int err;
+
+ if (!file->write_pending) {
+ output = fs_write_stream(file);
+ if ((ret = o_stream_send(output, data, size)) < 0) {
+ err = errno;
+ fs_write_stream_abort_error(file, &output, "fs_write(%s) failed: %s",
+ o_stream_get_name(output),
+ o_stream_get_error(output));
+ errno = err;
+ return -1;
+ }
+ i_assert((size_t)ret == size);
+ ret = fs_write_stream_finish(file, &output);
+ } else {
+ ret = fs_write_stream_finish_async(file);
+ }
+ if (ret == 0) {
+ fs_file_set_error_async(file);
+ file->write_pending = TRUE;
+ return -1;
+ }
+ file->write_pending = FALSE;
+ return ret < 0 ? -1 : 0;
+}
+
+int fs_write(struct fs_file *file, const void *data, size_t size)
+{
+ int ret;
+
+ if (file->fs->v.write != NULL) {
+ fs_file_timing_start(file, FS_OP_WRITE);
+ T_BEGIN {
+ ret = file->fs->v.write(file, data, size);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ file->fs->stats.write_count++;
+ file->fs->stats.write_bytes += size;
+ fs_file_timing_end(file, FS_OP_WRITE);
+ }
+ return ret;
+ }
+
+ /* backend didn't bother to implement write(), but we can do it with
+ streams. */
+ return fs_write_via_stream(file, data, size);
+}
+
+struct ostream *fs_write_stream(struct fs_file *file)
+{
+ i_assert(!file->writing_stream);
+ i_assert(file->output == NULL);
+
+ file->writing_stream = TRUE;
+ file->fs->stats.write_count++;
+ T_BEGIN {
+ file->fs->v.write_stream(file);
+ } T_END;
+ i_assert(file->output != NULL);
+ o_stream_cork(file->output);
+ return file->output;
+}
+
+static int fs_write_stream_finish_int(struct fs_file *file, bool success)
+{
+ int ret;
+
+ i_assert(file->writing_stream);
+
+ fs_file_timing_start(file, FS_OP_WRITE);
+ T_BEGIN {
+ ret = file->fs->v.write_stream_finish(file, success);
+ } T_END;
+ if (ret != 0) {
+ fs_file_timing_end(file, FS_OP_WRITE);
+ file->metadata_changed = FALSE;
+ } else {
+ /* write didn't finish yet. this shouldn't happen if we
+ indicated a failure. */
+ i_assert(success);
+ }
+ if (ret != 0) {
+ i_assert(file->output == NULL);
+ file->writing_stream = FALSE;
+ }
+ return ret;
+}
+
+int fs_write_stream_finish(struct fs_file *file, struct ostream **output)
+{
+ bool success = TRUE;
+ int ret;
+
+ i_assert(*output == file->output || *output == NULL);
+ i_assert(output != &file->output);
+
+ *output = NULL;
+ if (file->output != NULL) {
+ o_stream_uncork(file->output);
+ if ((ret = o_stream_finish(file->output)) <= 0) {
+ i_assert(ret < 0);
+ fs_set_error(file->event, file->output->stream_errno,
+ "write(%s) failed: %s",
+ o_stream_get_name(file->output),
+ o_stream_get_error(file->output));
+ success = FALSE;
+ }
+ file->fs->stats.write_bytes += file->output->offset;
+ }
+ return fs_write_stream_finish_int(file, success);
+}
+
+int fs_write_stream_finish_async(struct fs_file *file)
+{
+ return fs_write_stream_finish_int(file, TRUE);
+}
+
+static void fs_write_stream_abort(struct fs_file *file, struct ostream **output)
+{
+ int ret;
+
+ i_assert(*output == file->output);
+ i_assert(file->output != NULL);
+ i_assert(output != &file->output);
+
+ *output = NULL;
+ o_stream_abort(file->output);
+ /* make sure we don't have an old error lying around */
+ ret = fs_write_stream_finish_int(file, FALSE);
+ i_assert(ret != 0);
+}
+
+void fs_write_stream_abort_error(struct fs_file *file, struct ostream **output, const char *error_fmt, ...)
+{
+ va_list args;
+ va_start(args, error_fmt);
+ fs_set_verror(file->event, error_fmt, args);
+ /* the error shouldn't be automatically logged if
+ fs_file_last_error() is no longer used */
+ fs_file_get_error_file(file)->last_error_changed = FALSE;
+ fs_write_stream_abort(file, output);
+ va_end(args);
+}
+
+void fs_write_stream_abort_parent(struct fs_file *file, struct ostream **output)
+{
+ i_assert(file->parent != NULL);
+ i_assert(fs_file_last_error(file->parent) != NULL);
+ fs_write_stream_abort(file->parent, output);
+}
+
+void fs_write_set_hash(struct fs_file *file, const struct hash_method *method,
+ const void *digest)
+{
+ file->write_digest_method = method;
+
+ i_free(file->write_digest);
+ file->write_digest = i_malloc(method->digest_size);
+ memcpy(file->write_digest, digest, method->digest_size);
+}
+
+#undef fs_file_set_async_callback
+void fs_file_set_async_callback(struct fs_file *file,
+ fs_file_async_callback_t *callback,
+ void *context)
+{
+ if (file->fs->v.set_async_callback != NULL)
+ file->fs->v.set_async_callback(file, callback, context);
+ else
+ callback(context);
+}
+
+void fs_wait_async(struct fs *fs)
+{
+ /* recursion not allowed */
+ i_assert(fs->prev_ioloop == NULL);
+
+ if (fs->v.wait_async != NULL) T_BEGIN {
+ fs->prev_ioloop = current_ioloop;
+ fs->v.wait_async(fs);
+ i_assert(current_ioloop == fs->prev_ioloop);
+ fs->prev_ioloop = NULL;
+ } T_END;
+}
+
+bool fs_switch_ioloop(struct fs *fs)
+{
+ bool ret = FALSE;
+
+ if (fs->v.switch_ioloop != NULL) {
+ T_BEGIN {
+ ret = fs->v.switch_ioloop(fs);
+ } T_END;
+ } else if (fs->parent != NULL) {
+ ret = fs_switch_ioloop(fs->parent);
+ }
+ return ret;
+}
+
+int fs_lock(struct fs_file *file, unsigned int secs, struct fs_lock **lock_r)
+{
+ int ret;
+
+ T_BEGIN {
+ ret = file->fs->v.lock(file, secs, lock_r);
+ } T_END;
+ return ret;
+}
+
+void fs_unlock(struct fs_lock **_lock)
+{
+ struct fs_lock *lock = *_lock;
+
+ if (lock == NULL)
+ return;
+
+ *_lock = NULL;
+ T_BEGIN {
+ lock->file->fs->v.unlock(lock);
+ } T_END;
+}
+
+int fs_exists(struct fs_file *file)
+{
+ struct stat st;
+ int ret;
+
+ if (file->fs->v.exists == NULL) {
+ /* fallback to stat() */
+ if (fs_stat(file, &st) == 0)
+ return 1;
+ else
+ return errno == ENOENT ? 0 : -1;
+ }
+ fs_file_timing_start(file, FS_OP_EXISTS);
+ T_BEGIN {
+ ret = file->fs->v.exists(file);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ file->fs->stats.exists_count++;
+ fs_file_timing_end(file, FS_OP_EXISTS);
+ }
+ return ret;
+}
+
+int fs_stat(struct fs_file *file, struct stat *st_r)
+{
+ int ret;
+
+ if (file->fs->v.stat == NULL) {
+ fs_set_error(file->event, ENOTSUP, "fs_stat() not supported");
+ return -1;
+ }
+
+ if (!file->read_or_prefetch_counted &&
+ !file->lookup_metadata_counted && !file->stat_counted) {
+ file->stat_counted = TRUE;
+ file->fs->stats.stat_count++;
+ fs_file_timing_start(file, FS_OP_STAT);
+ }
+ T_BEGIN {
+ ret = file->fs->v.stat(file, st_r);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN))
+ fs_file_timing_end(file, FS_OP_STAT);
+ return ret;
+}
+
+int fs_get_nlinks(struct fs_file *file, nlink_t *nlinks_r)
+{
+ int ret;
+
+ if (file->fs->v.get_nlinks == NULL) {
+ struct stat st;
+
+ if (fs_stat(file, &st) < 0)
+ return -1;
+ *nlinks_r = st.st_nlink;
+ return 0;
+ }
+
+ if (!file->read_or_prefetch_counted &&
+ !file->lookup_metadata_counted && !file->stat_counted) {
+ file->stat_counted = TRUE;
+ file->fs->stats.stat_count++;
+ fs_file_timing_start(file, FS_OP_STAT);
+ }
+ T_BEGIN {
+ ret = file->fs->v.get_nlinks(file, nlinks_r);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN))
+ fs_file_timing_end(file, FS_OP_STAT);
+ return ret;
+}
+
+int fs_default_copy(struct fs_file *src, struct fs_file *dest)
+{
+ int tmp_errno;
+ /* we're going to be counting this as read+write, so don't update
+ copy_count */
+ dest->copy_counted = TRUE;
+
+ if (dest->copy_src != NULL) {
+ i_assert(src == NULL || src == dest->copy_src);
+ if (dest->copy_output == NULL) {
+ i_assert(dest->copy_input == NULL);
+ if (fs_write_stream_finish_async(dest) <= 0)
+ return -1;
+ dest->copy_src = NULL;
+ return 0;
+ }
+ } else {
+ dest->copy_src = src;
+ dest->copy_input = fs_read_stream(src, IO_BLOCK_SIZE);
+ dest->copy_output = fs_write_stream(dest);
+ }
+ switch (o_stream_send_istream(dest->copy_output, dest->copy_input)) {
+ case OSTREAM_SEND_ISTREAM_RESULT_FINISHED:
+ break;
+ case OSTREAM_SEND_ISTREAM_RESULT_WAIT_INPUT:
+ case OSTREAM_SEND_ISTREAM_RESULT_WAIT_OUTPUT:
+ fs_file_set_error_async(dest);
+ return -1;
+ case OSTREAM_SEND_ISTREAM_RESULT_ERROR_INPUT:
+ fs_write_stream_abort_error(dest, &dest->copy_output,
+ "read(%s) failed: %s",
+ i_stream_get_name(dest->copy_input),
+ i_stream_get_error(dest->copy_input));
+ errno = dest->copy_input->stream_errno;
+ i_stream_unref(&dest->copy_input);
+ return -1;
+ case OSTREAM_SEND_ISTREAM_RESULT_ERROR_OUTPUT:
+ /* errno might not survive abort error */
+ tmp_errno = dest->copy_output->stream_errno;
+ fs_write_stream_abort_error(dest, &dest->copy_output,
+ "write(%s) failed: %s",
+ o_stream_get_name(dest->copy_output),
+ o_stream_get_error(dest->copy_output));
+ errno = tmp_errno;
+ i_stream_unref(&dest->copy_input);
+ return -1;
+ }
+ i_stream_unref(&dest->copy_input);
+ if (fs_write_stream_finish(dest, &dest->copy_output) <= 0)
+ return -1;
+ dest->copy_src = NULL;
+ return 0;
+}
+
+int fs_copy(struct fs_file *src, struct fs_file *dest)
+{
+ int ret;
+
+ i_assert(src->fs == dest->fs);
+
+ if (src->fs->v.copy == NULL) {
+ fs_set_error(src->event, ENOTSUP, "fs_copy() not supported");
+ return -1;
+ }
+
+ fs_file_timing_start(dest, FS_OP_COPY);
+ T_BEGIN {
+ ret = src->fs->v.copy(src, dest);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ fs_file_timing_end(dest, FS_OP_COPY);
+ if (dest->copy_counted)
+ dest->copy_counted = FALSE;
+ else
+ dest->fs->stats.copy_count++;
+ dest->metadata_changed = FALSE;
+ }
+ return ret;
+}
+
+int fs_copy_finish_async(struct fs_file *dest)
+{
+ int ret;
+
+ T_BEGIN {
+ ret = dest->fs->v.copy(NULL, dest);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ fs_file_timing_end(dest, FS_OP_COPY);
+ if (dest->copy_counted)
+ dest->copy_counted = FALSE;
+ else
+ dest->fs->stats.copy_count++;
+ dest->metadata_changed = FALSE;
+ }
+ return ret;
+}
+
+int fs_rename(struct fs_file *src, struct fs_file *dest)
+{
+ int ret;
+
+ i_assert(src->fs == dest->fs);
+
+ fs_file_timing_start(dest, FS_OP_RENAME);
+ T_BEGIN {
+ ret = src->fs->v.rename(src, dest);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ dest->fs->stats.rename_count++;
+ fs_file_timing_end(dest, FS_OP_RENAME);
+ }
+ return ret;
+}
+
+int fs_delete(struct fs_file *file)
+{
+ int ret;
+
+ i_assert(!file->writing_stream);
+
+ fs_file_timing_start(file, FS_OP_DELETE);
+ T_BEGIN {
+ ret = file->fs->v.delete_file(file);
+ } T_END;
+ if (!(ret < 0 && errno == EAGAIN)) {
+ file->fs->stats.delete_count++;
+ fs_file_timing_end(file, FS_OP_DELETE);
+ }
+ return ret;
+}
+
+struct fs_iter *
+fs_iter_init(struct fs *fs, const char *path, enum fs_iter_flags flags)
+{
+ return fs_iter_init_with_event(fs, fs->event, path, flags);
+}
+
+struct fs_iter *
+fs_iter_init_with_event(struct fs *fs, struct event *event,
+ const char *path, enum fs_iter_flags flags)
+{
+ struct fs_iter *iter;
+ struct timeval now = ioloop_timeval;
+
+ i_assert((flags & FS_ITER_FLAG_OBJECTIDS) == 0 ||
+ (fs_get_properties(fs) & FS_PROPERTY_OBJECTIDS) != 0);
+
+ fs->stats.iter_count++;
+ if (fs->set.enable_timing)
+ i_gettimeofday(&now);
+ if (fs->v.iter_init == NULL) {
+ iter = i_new(struct fs_iter, 1);
+ iter->fs = fs;
+ } else T_BEGIN {
+ iter = fs->v.iter_alloc();
+ iter->fs = fs;
+ iter->flags = flags;
+ iter->path = i_strdup(path);
+ iter->event = fs_create_event(fs, event);
+ event_set_ptr(iter->event, FS_EVENT_FIELD_FS, fs);
+ event_set_ptr(iter->event, FS_EVENT_FIELD_ITER, iter);
+ fs->v.iter_init(iter, path, flags);
+ } T_END;
+ iter->start_time = now;
+ DLLIST_PREPEND(&fs->iters, iter);
+ return iter;
+}
+
+int fs_iter_deinit(struct fs_iter **_iter, const char **error_r)
+{
+ struct fs_iter *iter = *_iter;
+ struct fs *fs;
+ struct event *event;
+ int ret;
+
+ if (iter == NULL)
+ return 0;
+
+ fs = iter->fs;
+ event = iter->event;
+
+ *_iter = NULL;
+ DLLIST_REMOVE(&fs->iters, iter);
+
+ if (fs->v.iter_deinit == NULL) {
+ fs_set_error(event, ENOTSUP, "FS iteration not supported");
+ ret = -1;
+ } else T_BEGIN {
+ ret = iter->fs->v.iter_deinit(iter);
+ } T_END;
+ if (ret < 0)
+ *error_r = t_strdup(iter->last_error);
+ i_free(iter->last_error);
+ i_free(iter->path);
+ i_free(iter);
+ event_unref(&event);
+ return ret;
+}
+
+const char *fs_iter_next(struct fs_iter *iter)
+{
+ const char *ret;
+
+ if (iter->fs->v.iter_next == NULL)
+ return NULL;
+ T_BEGIN {
+ ret = iter->fs->v.iter_next(iter);
+ } T_END;
+ if (iter->start_time.tv_sec != 0 &&
+ (ret != NULL || !fs_iter_have_more(iter))) {
+ /* first result returned - count this as the finish time, since
+ we don't want to count the time caller spends on this
+ iteration. */
+ fs_timing_end(&iter->fs->stats.timings[FS_OP_ITER], &iter->start_time);
+ /* don't count this again */
+ iter->start_time.tv_sec = 0;
+ }
+ return ret;
+}
+
+#undef fs_iter_set_async_callback
+void fs_iter_set_async_callback(struct fs_iter *iter,
+ fs_file_async_callback_t *callback,
+ void *context)
+{
+ iter->async_callback = callback;
+ iter->async_context = context;
+}
+
+bool fs_iter_have_more(struct fs_iter *iter)
+{
+ return iter->async_have_more;
+}
+
+const struct fs_stats *fs_get_stats(struct fs *fs)
+{
+ return &fs->stats;
+}
+
+void fs_set_error(struct event *event, int err, const char *fmt, ...)
+{
+ va_list args;
+
+ i_assert(err != 0);
+
+ errno = err;
+ va_start(args, fmt);
+ fs_set_verror(event, fmt, args);
+ va_end(args);
+}
+
+void fs_set_error_errno(struct event *event, const char *fmt, ...)
+{
+ va_list args;
+
+ i_assert(errno != 0);
+
+ va_start(args, fmt);
+ fs_set_verror(event, fmt, args);
+ va_end(args);
+}
+
+void fs_file_set_error_async(struct fs_file *file)
+{
+ fs_set_error(file->event, EAGAIN, "Asynchronous operation in progress");
+}
+
+static uint64_t
+fs_stats_count_ops(const struct fs_stats *stats, const enum fs_op ops[],
+ unsigned int ops_count)
+{
+ uint64_t ret = 0;
+
+ for (unsigned int i = 0; i < ops_count; i++) {
+ if (stats->timings[ops[i]] != NULL)
+ ret += stats_dist_get_sum(stats->timings[ops[i]]);
+ }
+ return ret;
+}
+
+uint64_t fs_stats_get_read_usecs(const struct fs_stats *stats)
+{
+ const enum fs_op read_ops[] = {
+ FS_OP_METADATA, FS_OP_PREFETCH, FS_OP_READ, FS_OP_EXISTS,
+ FS_OP_STAT, FS_OP_ITER
+ };
+ return fs_stats_count_ops(stats, read_ops, N_ELEMENTS(read_ops));
+}
+
+uint64_t fs_stats_get_write_usecs(const struct fs_stats *stats)
+{
+ const enum fs_op write_ops[] = {
+ FS_OP_WRITE, FS_OP_COPY, FS_OP_DELETE
+ };
+ return fs_stats_count_ops(stats, write_ops, N_ELEMENTS(write_ops));
+}
+
+struct fs_file *
+fs_file_init_parent(struct fs_file *parent, const char *path,
+ enum fs_open_mode mode, enum fs_open_flags flags)
+{
+ return fs_file_init_with_event(parent->fs->parent, parent->event,
+ path, (int)mode | (int)flags);
+}
+
+struct fs_iter *
+fs_iter_init_parent(struct fs_iter *parent,
+ const char *path, enum fs_iter_flags flags)
+{
+ return fs_iter_init_with_event(parent->fs->parent, parent->event,
+ path, flags);
+}