summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/chunkio/src/cio_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fluent-bit/lib/chunkio/src/cio_file.c')
-rw-r--r--fluent-bit/lib/chunkio/src/cio_file.c1344
1 files changed, 1344 insertions, 0 deletions
diff --git a/fluent-bit/lib/chunkio/src/cio_file.c b/fluent-bit/lib/chunkio/src/cio_file.c
new file mode 100644
index 00000000..019baa89
--- /dev/null
+++ b/fluent-bit/lib/chunkio/src/cio_file.c
@@ -0,0 +1,1344 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* Chunk I/O
+ * =========
+ * Copyright 2018-2019 Eduardo Silva <eduardo@monkey.io>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+#include <chunkio/chunkio.h>
+#include <chunkio/chunkio_compat.h>
+#include <chunkio/cio_crc32.h>
+#include <chunkio/cio_chunk.h>
+#include <chunkio/cio_file.h>
+#include <chunkio/cio_file_native.h>
+#include <chunkio/cio_file_st.h>
+#include <chunkio/cio_log.h>
+#include <chunkio/cio_stream.h>
+#include <chunkio/cio_error.h>
+#include <chunkio/cio_utils.h>
+
+size_t scio_file_page_size = 0;
+
+char cio_file_init_bytes[] = {
+ /* file type (2 bytes) */
+ CIO_FILE_ID_00, CIO_FILE_ID_01,
+
+ /* crc32 (4 bytes) in network byte order */
+ 0xff, 0x12, 0xd9, 0x41,
+
+ /* padding bytes (we have 16 extra bytes) */
+ 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+
+ /* metadata length (2 bytes) */
+ 0x00, 0x00
+};
+
+#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
+
+
+/* Calculate content checksum in a variable */
+void cio_file_calculate_checksum(struct cio_file *cf, crc_t *out)
+{
+ crc_t val;
+ size_t len;
+ ssize_t content_length;
+ unsigned char *in_data;
+
+ if (cf->fs_size == 0) {
+ cio_file_update_size(cf);
+ }
+
+ /* Metadata length header + metadata length + content length */
+ len = 2;
+ len += cio_file_st_get_meta_len(cf->map);
+
+ content_length = cio_file_st_get_content_len(cf->map,
+ cf->fs_size,
+ cf->page_size);
+
+ if (content_length > 0) {
+ len += content_length;
+ }
+
+ in_data = (unsigned char *) cf->map + CIO_FILE_CONTENT_OFFSET;
+ val = cio_crc32_update(cf->crc_cur, in_data, len);
+ *out = val;
+}
+
+/* Update crc32 checksum into the memory map */
+static void update_checksum(struct cio_file *cf,
+ unsigned char *data, size_t len)
+{
+ crc_t crc;
+ crc_t tmp;
+
+ if (cf->crc_reset) {
+ cf->crc_cur = cio_crc32_init();
+ cio_file_calculate_checksum(cf, &tmp);
+ cf->crc_cur = tmp;
+ cf->crc_reset = CIO_FALSE;
+ }
+
+ crc = cio_crc32_update(cf->crc_cur, data, len);
+ memcpy(cf->map + 2, &crc, sizeof(crc));
+ cf->crc_cur = crc;
+}
+
+/* Finalize CRC32 context and update the memory map */
+static void finalize_checksum(struct cio_file *cf)
+{
+ crc_t crc;
+
+ crc = cio_crc32_finalize(cf->crc_cur);
+ crc = htonl(crc);
+
+ memcpy(cf->map + 2, &crc, sizeof(crc));
+}
+
+/*
+ * adjust_layout: if metadata has changed, we need to adjust the content
+ * data and reference pointers.
+ */
+static int adjust_layout(struct cio_chunk *ch,
+ struct cio_file *cf, size_t meta_size)
+{
+ cio_file_st_set_meta_len(cf->map, (uint16_t) meta_size);
+
+ /* Update checksum */
+ if (ch->ctx->options.flags & CIO_CHECKSUM) {
+ /* reset current crc since we are calculating from zero */
+ cf->crc_cur = cio_crc32_init();
+ cio_file_calculate_checksum(cf, &cf->crc_cur);
+ }
+
+ /* Sync changes to disk */
+ cf->synced = CIO_FALSE;
+
+ return 0;
+}
+
+/* Initialize Chunk header & structure */
+static void write_init_header(struct cio_chunk *ch, struct cio_file *cf)
+{
+ memcpy(cf->map, cio_file_init_bytes, sizeof(cio_file_init_bytes));
+
+ /* If no checksum is enabled, reset the initial crc32 bytes */
+ if (!(ch->ctx->options.flags & CIO_CHECKSUM)) {
+ cf->map[2] = 0;
+ cf->map[3] = 0;
+ cf->map[4] = 0;
+ cf->map[5] = 0;
+ }
+
+ cio_file_st_set_content_len(cf->map, 0);
+}
+
+/* Return the available size in the file map to write data */
+static size_t get_available_size(struct cio_file *cf, int *meta_len)
+{
+ size_t av;
+ int metadata_len;
+
+ /* Get metadata length */
+ metadata_len = cio_file_st_get_meta_len(cf->map);
+
+ av = cf->alloc_size;
+ av -= CIO_FILE_HEADER_MIN;
+ av -= metadata_len;
+ av -= cf->data_size;
+
+ *meta_len = metadata_len;
+
+ return av;
+}
+
+/*
+ * For the recently opened or created file, check the structure format
+ * and validate relevant fields.
+ */
+static int cio_file_format_check(struct cio_chunk *ch,
+ struct cio_file *cf, int flags)
+{
+ size_t metadata_length;
+ ssize_t content_length;
+ ssize_t logical_length;
+ unsigned char *p;
+ crc_t crc_check;
+ crc_t crc;
+
+ (void) flags;
+
+ p = (unsigned char *) cf->map;
+
+ /* If the file is empty, put the structure on it */
+ if (cf->fs_size == 0) {
+ /* check we have write permissions */
+ if ((cf->flags & CIO_OPEN) == 0) {
+ cio_log_warn(ch->ctx,
+ "[cio file] cannot initialize chunk (read-only)");
+ cio_error_set(ch, CIO_ERR_PERMISSION);
+
+ return -1;
+ }
+
+ /* at least we need 24 bytes as allocated space */
+ if (cf->alloc_size < CIO_FILE_HEADER_MIN) {
+ cio_log_warn(ch->ctx, "[cio file] cannot initialize chunk");
+ cio_error_set(ch, CIO_ERR_BAD_LAYOUT);
+
+ return -1;
+ }
+
+ /* Initialize init bytes */
+ write_init_header(ch, cf);
+
+ /* Write checksum in context (note: crc32 not finalized) */
+ if (ch->ctx->options.flags & CIO_CHECKSUM) {
+ cio_file_calculate_checksum(cf, &cf->crc_cur);
+ }
+ }
+ else {
+ /* Check first two bytes */
+ if (p[0] != CIO_FILE_ID_00 || p[1] != CIO_FILE_ID_01) {
+ cio_log_debug(ch->ctx, "[cio file] invalid header at %s",
+ ch->name);
+ cio_error_set(ch, CIO_ERR_BAD_LAYOUT);
+
+ return -1;
+ }
+
+ /* Expected / logical file size verification */
+ content_length = cio_file_st_get_content_len(cf->map,
+ cf->fs_size,
+ cf->page_size);
+
+ if (content_length == -1) {
+ cio_log_debug(ch->ctx, "[cio file] truncated header (%zu / %zu) %s",
+ cf->fs_size, CIO_FILE_HEADER_MIN, ch->name);
+ cio_error_set(ch, CIO_ERR_BAD_FILE_SIZE);
+
+ return -1;
+ }
+
+ metadata_length = cio_file_st_get_meta_len(cf->map);
+
+ logical_length = CIO_FILE_HEADER_MIN +
+ metadata_length +
+ content_length;
+
+ if (logical_length > cf->fs_size) {
+ cio_log_debug(ch->ctx, "[cio file] truncated file (%zd / %zd) %s",
+ cf->fs_size, logical_length, ch->name);
+ cio_error_set(ch, CIO_ERR_BAD_FILE_SIZE);
+
+ return -1;
+ }
+
+ /* Checksum */
+ if (ch->ctx->options.flags & CIO_CHECKSUM) {
+ /* Initialize CRC variable */
+ cf->crc_cur = cio_crc32_init();
+
+ /* Get checksum stored in the mmap */
+ p = (unsigned char *) cio_file_st_get_hash(cf->map);
+
+ /* Calculate content checksum */
+ cio_file_calculate_checksum(cf, &crc);
+
+ /* Compare */
+ crc_check = cio_crc32_finalize(crc);
+ crc_check = htonl(crc_check);
+
+ if (memcmp(p, &crc_check, sizeof(crc_check)) != 0) {
+ cio_log_info(ch->ctx, "[cio file] invalid crc32 at %s/%s",
+ ch->name, cf->path);
+ cio_error_set(ch, CIO_ERR_BAD_CHECKSUM);
+
+ return -1;
+ }
+
+ cf->crc_cur = crc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Unmap the memory for the opened file in question. It make sure
+ * to sync changes to disk first.
+ */
+static int munmap_file(struct cio_ctx *ctx, struct cio_chunk *ch)
+{
+ int ret;
+ struct cio_file *cf;
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (!cf) {
+ return -1;
+ }
+
+ /* File not mapped */
+ if (cf->map == NULL) {
+ return -1;
+ }
+
+ /* Sync pending changes to disk */
+ if (cf->synced == CIO_FALSE) {
+ ret = cio_file_sync(ch);
+ if (ret == -1) {
+ cio_log_error(ch->ctx,
+ "[cio file] error syncing file at "
+ "%s:%s", ch->st->name, ch->name);
+ }
+ }
+
+ /* Unmap file */
+ cio_file_native_unmap(cf);
+
+ cf->data_size = 0;
+ cf->alloc_size = 0;
+
+ /* Adjust counters */
+ cio_chunk_counter_total_up_sub(ctx);
+
+ return 0;
+}
+
+/*
+ * This function creates the memory map for the open file descriptor plus
+ * setup the chunk structure reference.
+ */
+static int mmap_file(struct cio_ctx *ctx, struct cio_chunk *ch, size_t size)
+{
+ ssize_t content_size;
+ size_t fs_size;
+ int ret;
+ struct cio_file *cf;
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (cf->map != NULL) {
+ return CIO_OK;
+ }
+
+ /*
+ * 'size' value represents the value of a previous fstat(2) set by a previous
+ * caller. If the value is greater than zero, just use it, otherwise do a new
+ * fstat(2) of the file descriptor.
+ */
+
+ fs_size = 0;
+
+ if (size > 0) {
+ fs_size = size;
+ }
+ else {
+ /* Get file size from the file system */
+ ret = cio_file_native_get_size(cf, &fs_size);
+
+ if (ret != CIO_OK) {
+ cio_file_report_os_error();
+
+ return CIO_ERROR;
+ }
+ }
+
+ /* If the file is not empty, use file size for the memory map */
+ if (fs_size > 0) {
+ size = fs_size;
+ cf->synced = CIO_TRUE;
+ }
+ else if (fs_size == 0) {
+ /* We can only prepare a file if it has been opened in RW mode */
+ if ((cf->flags & CIO_OPEN_RW) == 0) {
+ cio_error_set(ch, CIO_ERR_PERMISSION);
+
+ return CIO_CORRUPTED;
+ }
+
+ cf->synced = CIO_FALSE;
+
+ /* Adjust size to make room for headers */
+ if (size < CIO_FILE_HEADER_MIN) {
+ size += CIO_FILE_HEADER_MIN;
+ }
+
+ /* For empty files, make room in the file system */
+ size = ROUND_UP(size, ctx->page_size);
+ ret = cio_file_resize(cf, size);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ctx, "cannot adjust chunk size '%s' to %lu bytes",
+ cf->path, size);
+
+ return CIO_ERROR;
+ }
+
+ cio_log_debug(ctx, "%s:%s adjusting size OK", ch->st->name, ch->name);
+ }
+
+ cf->alloc_size = size;
+
+ /* Map the file */
+ ret = cio_file_native_map(cf, cf->alloc_size);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ctx, "cannot mmap/read chunk '%s'", cf->path);
+
+ return CIO_ERROR;
+ }
+
+ /* check content data size */
+ if (fs_size > 0) {
+ content_size = cio_file_st_get_content_len(cf->map,
+ fs_size,
+ cf->page_size);
+
+ if (content_size == -1) {
+ cio_error_set(ch, CIO_ERR_BAD_FILE_SIZE);
+
+ cio_log_error(ctx, "invalid content size %s", cf->path);
+
+ cio_file_native_unmap(cf);
+
+ cf->data_size = 0;
+ cf->alloc_size = 0;
+
+ return CIO_CORRUPTED;
+ }
+
+
+ cf->data_size = content_size;
+ cf->fs_size = fs_size;
+ }
+ else {
+ cf->data_size = 0;
+ cf->fs_size = 0;
+ }
+
+ ret = cio_file_format_check(ch, cf, cf->flags);
+
+ if (ret != 0) {
+ cio_log_error(ctx, "format check failed: %s/%s",
+ ch->st->name, ch->name);
+
+ cio_file_native_unmap(cf);
+
+ cf->data_size = 0;
+
+ return CIO_CORRUPTED;
+ }
+
+ cf->st_content = cio_file_st_get_content(cf->map);
+ cio_log_debug(ctx, "%s:%s mapped OK", ch->st->name, ch->name);
+
+ /* The mmap succeeded, adjust the counters */
+ cio_chunk_counter_total_up_add(ctx);
+
+ return CIO_OK;
+}
+
+int cio_file_lookup_user(char *user, void **result)
+{
+ return cio_file_native_lookup_user(user, result);
+}
+
+int cio_file_lookup_group(char *group, void **result)
+{
+ return cio_file_native_lookup_group(group, result);
+}
+
+int cio_file_read_prepare(struct cio_ctx *ctx, struct cio_chunk *ch)
+{
+ return mmap_file(ctx, ch, 0);
+}
+
+int cio_file_content_copy(struct cio_chunk *ch,
+ void **out_buf, size_t *out_size)
+{
+ int ret;
+ int set_down = CIO_FALSE;
+ char *buf;
+ char *data = NULL;
+ size_t size;
+ struct cio_file *cf = ch->backend;
+
+ /* If the file content is already up, just do a copy of the memory map */
+ if (cio_chunk_is_up(ch) == CIO_FALSE) {
+ ret = cio_chunk_up_force(ch);
+ if (ret != CIO_OK ){
+ return CIO_ERROR;
+ }
+ set_down = CIO_TRUE;
+ }
+
+ size = cf->data_size;
+ data = cio_file_st_get_content(cf->map);
+
+ if (!data) {
+ if (set_down == CIO_TRUE) {
+ cio_chunk_down(ch);
+ }
+ return CIO_ERROR;
+ }
+
+ buf = malloc(size + 1);
+ if (!buf) {
+ cio_errno();
+ if (set_down == CIO_TRUE) {
+ cio_chunk_down(ch);
+ }
+ return CIO_ERROR;
+ }
+ memcpy(buf, data, size);
+ buf[size] = '\0';
+
+ *out_buf = buf;
+ *out_size = size;
+
+ if (set_down == CIO_TRUE) {
+ cio_chunk_down(ch);
+ }
+
+ return CIO_OK;
+}
+
+/*
+ * If the maximum number of 'up' chunks is reached, put this chunk
+ * down (only at open time).
+ */
+static inline int open_and_up(struct cio_ctx *ctx)
+{
+ if (ctx->total_chunks_up >= ctx->max_chunks_up) {
+ return CIO_FALSE;
+ }
+
+ return CIO_TRUE;
+}
+
+/*
+ * Fetch the file size regardless of if we opened this file or not.
+ */
+size_t cio_file_real_size(struct cio_file *cf)
+{
+ size_t file_size;
+ int ret;
+
+ ret = cio_file_native_get_size(cf, &file_size);
+
+ if (ret != CIO_OK) {
+ return 0;
+ }
+
+ return file_size;
+}
+
+static int format_acl_error_message(struct cio_ctx *ctx,
+ struct cio_file *cf,
+ char *output_buffer,
+ size_t output_buffer_size)
+{
+ char *connector;
+ int result;
+ char *group;
+ char *user;
+
+ user = ctx->options.user;
+ group = ctx->options.group;
+ connector = "with group";
+
+ if (user == NULL) {
+ user = "";
+ connector = "";
+ }
+
+ if (group == NULL) {
+ group = "";
+ connector = "";
+ }
+
+ result = snprintf(output_buffer, output_buffer_size - 1,
+ "cannot change ownership of %s to %s %s %s",
+ cf->path, user, connector, group);
+
+ if (result < 0) {
+ return CIO_ERROR;
+ }
+
+ return CIO_OK;
+}
+
+/*
+ * Open or create a data file: the following behavior is expected depending
+ * of the passed flags:
+ *
+ * CIO_OPEN | CIO_OPEN_RW:
+ * - Open for read/write, if the file don't exist, it's created and the
+ * memory map size is assigned to the given value on 'size'.
+ *
+ * CIO_OPEN_RD:
+ * - If file exists, open it in read-only mode.
+ */
+struct cio_file *cio_file_open(struct cio_ctx *ctx,
+ struct cio_stream *st,
+ struct cio_chunk *ch,
+ int flags,
+ size_t size,
+ int *err)
+{
+ char error_message[256];
+ char *path;
+ int ret;
+ struct cio_file *cf;
+
+ (void) size;
+
+ ret = cio_file_native_filename_check(ch->name);
+ if (ret != CIO_OK) {
+ cio_log_error(ctx, "[cio file] invalid file name");
+
+ return NULL;
+ }
+
+ path = cio_file_native_compose_path(ctx->options.root_path, st->name, ch->name);
+ if (path == NULL) {
+ return NULL;
+ }
+
+ /* Create file context */
+ cf = calloc(1, sizeof(struct cio_file));
+ if (!cf) {
+ cio_errno();
+ free(path);
+
+ return NULL;
+ }
+
+ cf->fd = -1;
+ cf->flags = flags;
+ cf->page_size = cio_getpagesize();
+
+ if (ctx->realloc_size_hint > 0) {
+ cf->realloc_size = ctx->realloc_size_hint;
+ }
+ else {
+ cf->realloc_size = CIO_REALLOC_HINT_MIN;
+ }
+
+ cf->st_content = NULL;
+ cf->crc_cur = cio_crc32_init();
+ cf->path = path;
+ cf->map = NULL;
+ ch->backend = cf;
+
+#ifdef _WIN32
+ cf->backing_file = INVALID_HANDLE_VALUE;
+ cf->backing_mapping = INVALID_HANDLE_VALUE;
+#endif
+
+#if defined (CIO_HAVE_FALLOCATE)
+ cf->allocate_strategy = CIO_FILE_LINUX_FALLOCATE;
+#endif
+
+ /* Should we open and put this file up ? */
+ ret = open_and_up(ctx);
+
+ if (ret == CIO_FALSE) {
+ /* we reached our limit, leave the file 'down' */
+ cio_file_update_size(cf);
+
+ /*
+ * Due to he current resource limiting logic we could
+ * get to this point without a file existing so we just
+ * ignore the error.
+ */
+
+ return cf;
+ }
+
+ /* Open the file */
+ ret = cio_file_native_open(cf);
+
+ if (ret != CIO_OK) {
+ free(path);
+ free(cf);
+
+ *err = ret;
+
+ return NULL;
+ }
+
+ /* Update the file size field */
+ ret = cio_file_update_size(cf);
+
+ if (ret != CIO_OK) {
+ cio_file_native_close(cf);
+
+ free(path);
+ free(cf);
+
+ *err = ret;
+
+ return NULL;
+ }
+
+ /* Set the file ownership and permissions */
+ ret = cio_file_native_apply_acl_and_settings(ctx, cf);
+
+ if (ret != CIO_OK) {
+ *err = ret;
+
+ ret = format_acl_error_message(ctx, cf, error_message, sizeof(error_message));
+
+ if (ret != CIO_OK) {
+ cio_log_error(ctx, "error generating error message for acl failure");
+ }
+ else {
+ cio_log_error(ctx, error_message);
+ }
+
+ cio_file_native_close(cf);
+
+ free(path);
+ free(cf);
+
+ return NULL;
+ }
+
+ /* Map the file */
+ ret = mmap_file(ctx, ch, cf->fs_size);
+ if (ret == CIO_ERROR || ret == CIO_CORRUPTED || ret == CIO_RETRY) {
+ cio_file_native_close(cf);
+
+ free(path);
+ free(cf);
+
+ *err = ret;
+
+ return NULL;
+ }
+
+ *err = CIO_OK;
+
+ return cf;
+}
+
+/* This function is used to delete a chunk by name, its only purpose is to delete
+ * chunks that cannnot be loaded (otherwise we would set them down with the delete
+ * flag set to TRUE).
+ */
+int cio_file_delete(struct cio_ctx *ctx, struct cio_stream *st, const char *name)
+{
+ char *path;
+ int ret;
+
+ ret = cio_file_native_filename_check((char *) name);
+ if (ret != CIO_OK) {
+ cio_log_error(ctx, "[cio file] invalid file name");
+
+ return CIO_ERROR;
+ }
+
+ path = cio_file_native_compose_path(ctx->options.root_path, st->name, (char *) name);
+ if (path == NULL) {
+ return CIO_ERROR;
+ }
+
+ ret = cio_file_native_delete_by_path(path);
+
+ free(path);
+
+ return ret;
+}
+
+/*
+ * Put a file content back into memory, only IF it has been set 'down'
+ * before.
+ */
+static int _cio_file_up(struct cio_chunk *ch, int enforced)
+{
+ int ret;
+ struct cio_file *cf = (struct cio_file *) ch->backend;
+
+ if (cf->map) {
+ cio_log_error(ch->ctx, "[cio file] file is already mapped: %s/%s",
+ ch->st->name, ch->name);
+ return CIO_ERROR;
+ }
+
+ if (cf->fd > 0) {
+ cio_log_error(ch->ctx, "[cio file] file descriptor already exists: "
+ "[fd=%i] %s:%s", cf->fd, ch->st->name, ch->name);
+ return CIO_ERROR;
+ }
+
+ /*
+ * Enforced mechanism provides safety based on Chunk I/O storage
+ * pre-set limits.
+ */
+ if (enforced == CIO_TRUE) {
+ ret = open_and_up(ch->ctx);
+ if (ret == CIO_FALSE) {
+ return CIO_ERROR;
+ }
+ }
+
+ /* Open file */
+ ret = cio_file_native_open(cf);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ch->ctx, "[cio file] cannot open chunk: %s/%s",
+ ch->st->name, ch->name);
+ return CIO_ERROR;
+ }
+
+ ret = cio_file_update_size(cf);
+ if (ret != CIO_OK) {
+ return CIO_ERROR;
+ }
+
+ /*
+ * Map content:
+ *
+ * return values = CIO_OK, CIO_ERROR, CIO_CORRUPTED or CIO_RETRY
+ */
+ ret = mmap_file(ch->ctx, ch, cf->fs_size);
+ if (ret == CIO_ERROR) {
+ cio_log_error(ch->ctx, "[cio file] cannot map chunk: %s/%s",
+ ch->st->name, ch->name);
+ }
+
+ /*
+ * 'ret' can still be CIO_CORRUPTED or CIO_RETRY on those cases we
+ * close the file descriptor
+ */
+ if (ret == CIO_CORRUPTED || ret == CIO_RETRY) {
+ /*
+ * we just remove resources: close the recently opened file
+ * descriptor, we never delete the Chunk at this stage since
+ * the caller must take that action.
+ */
+ cio_file_native_close(cf);
+ }
+
+ return ret;
+}
+
+/*
+ * Load a file using 'enforced' mode: do not load the file in memory
+ * if we already passed memory or max_chunks_up restrictions.
+ */
+int cio_file_up(struct cio_chunk *ch)
+{
+ return _cio_file_up(ch, CIO_TRUE);
+}
+
+/* Load a file in non-enforced mode. This means it will load the file
+ * in memory skipping restrictions set by configuration.
+ *
+ * The use case of this call is when the caller needs to write data
+ * to a file which is down due to restrictions. But then the caller
+ * must put the chunk 'down' again if that was it original status.
+ */
+int cio_file_up_force(struct cio_chunk *ch)
+{
+ return _cio_file_up(ch, CIO_FALSE);
+}
+
+int cio_file_update_size(struct cio_file *cf)
+{
+ int result;
+
+ result = cio_file_native_get_size(cf, &cf->fs_size);
+
+ if (result != CIO_OK) {
+ cf->fs_size = 0;
+ }
+
+ return result;
+}
+
+/* Release memory and file descriptor resources but keep context */
+int cio_file_down(struct cio_chunk *ch)
+{
+ int ret;
+ struct cio_file *cf;
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (cf->map == NULL) {
+ cio_log_error(ch->ctx, "[cio file] file is not mapped: %s/%s",
+ ch->st->name, ch->name);
+ return -1;
+ }
+
+ /* unmap memory */
+ munmap_file(ch->ctx, ch);
+
+ /* Allocated map size is zero */
+ cf->alloc_size = 0;
+
+ /* Update the file size */
+ ret = cio_file_update_size(cf);
+
+ if (ret != CIO_OK) {
+ cio_errno();
+ }
+
+ /* Close file descriptor */
+ cio_file_native_close(cf);
+
+ return 0;
+}
+
+void cio_file_close(struct cio_chunk *ch, int delete)
+{
+ int ret;
+ struct cio_file *cf;
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (cf == NULL) {
+ return;
+ }
+
+ /* Safe unmap of the file content */
+ munmap_file(ch->ctx, ch);
+
+ /* Close file descriptor */
+ cio_file_native_close(cf);
+
+ /* Should we delete the content from the file system ? */
+ if (delete == CIO_TRUE) {
+ ret = cio_file_native_delete(cf);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ch->ctx,
+ "[cio file] error deleting file at close %s:%s",
+ ch->st->name, ch->name);
+ }
+ }
+
+ free(cf->path);
+ free(cf);
+}
+
+
+int cio_file_write(struct cio_chunk *ch, const void *buf, size_t count)
+{
+ int ret;
+ int meta_len;
+ int pre_content;
+ size_t av_size;
+ size_t old_size;
+ size_t new_size;
+ struct cio_file *cf;
+
+ if (count == 0) {
+ /* do nothing */
+ return 0;
+ }
+
+ if (!ch) {
+ return -1;
+ }
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (cio_chunk_is_up(ch) == CIO_FALSE) {
+ cio_log_error(ch->ctx, "[cio file] file is not mmap()ed: %s:%s",
+ ch->st->name, ch->name);
+ return -1;
+ }
+
+ /* get available size */
+ av_size = get_available_size(cf, &meta_len);
+
+ /* validate there is enough space, otherwise resize */
+ if (av_size < count) {
+ /* Set the pre-content size (chunk header + metadata) */
+ pre_content = (CIO_FILE_HEADER_MIN + meta_len);
+
+ new_size = cf->alloc_size + cf->realloc_size;
+ while (new_size < (pre_content + cf->data_size + count)) {
+ new_size += cf->realloc_size;
+ }
+
+ old_size = cf->alloc_size;
+ new_size = ROUND_UP(new_size, ch->ctx->page_size);
+
+ ret = cio_file_resize(cf, new_size);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ch->ctx,
+ "[cio_file] error setting new file size on write");
+ return -1;
+ }
+
+ cio_log_debug(ch->ctx,
+ "[cio file] alloc_size from %lu to %lu",
+ old_size, new_size);
+ }
+
+ /* If crc_reset was toggled we know that data_size was
+ * modified by cio_chunk_write_at which means we need
+ * to update the header before we recalculate the checksum
+ */
+ if (cf->crc_reset) {
+ cio_file_st_set_content_len(cf->map, cf->data_size);
+ }
+
+ if (ch->ctx->options.flags & CIO_CHECKSUM) {
+ update_checksum(cf, (unsigned char *) buf, count);
+ }
+
+ cf->st_content = cio_file_st_get_content(cf->map);
+ memcpy(cf->st_content + cf->data_size, buf, count);
+
+ cf->data_size += count;
+ cf->synced = CIO_FALSE;
+
+ cio_file_st_set_content_len(cf->map, cf->data_size);
+
+ return 0;
+}
+
+int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size)
+{
+ int ret;
+ char *meta;
+ char *cur_content_data;
+ char *new_content_data;
+ size_t new_size;
+ size_t content_av;
+ size_t meta_av;
+ struct cio_file *cf;
+
+ cf = ch->backend;
+
+ if (cio_file_is_up(ch, cf) == CIO_FALSE) {
+ return -1;
+ }
+
+ /* Get metadata pointer */
+ meta = cio_file_st_get_meta(cf->map);
+
+ /* Check if meta already have some space available to overwrite */
+ meta_av = cio_file_st_get_meta_len(cf->map);
+
+ /* If there is some space available, just overwrite */
+ if (meta_av >= size) {
+ /* copy new metadata */
+ memcpy(meta, buf, size);
+
+ /* there are some remaining bytes, adjust.. */
+ cur_content_data = cio_file_st_get_content(cf->map);
+ new_content_data = meta + size;
+ memmove(new_content_data, cur_content_data, cf->data_size);
+ adjust_layout(ch, cf, size);
+
+ return 0;
+ }
+
+ /*
+ * The optimal case is if there is no content data, the non-optimal case
+ * where we need to increase the memory map size, move the content area
+ * bytes to a different position and write the metadata.
+ *
+ * Calculate the available space in the content area.
+ */
+ content_av = cf->alloc_size - cf->data_size;
+
+ /* If there is no enough space, increase the file size and it memory map */
+ if (content_av < size) {
+ new_size = (size - meta_av) + cf->data_size + CIO_FILE_HEADER_MIN;
+
+ ret = cio_file_resize(cf, new_size);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ch->ctx,
+ "[cio meta] error resizing mapped file");
+
+ return -1;
+ }
+ }
+
+ /* get meta reference again in case the map address has changed */
+ meta = cio_file_st_get_meta(cf->map);
+
+ /* set new position for the content data */
+ cur_content_data = cio_file_st_get_content(cf->map);
+ new_content_data = meta + size;
+ memmove(new_content_data, cur_content_data, size);
+
+ /* copy new metadata */
+ memcpy(meta, buf, size);
+ adjust_layout(ch, cf, size);
+
+ return 0;
+}
+
+int cio_file_sync(struct cio_chunk *ch)
+{
+ int ret;
+ int meta_len;
+ size_t desired_size;
+ size_t file_size;
+ size_t av_size;
+ struct cio_file *cf;
+
+ if (ch == NULL) {
+ return -1;
+ }
+
+ cf = (struct cio_file *) ch->backend;
+
+ if (cf == NULL) {
+ return -1;
+ }
+
+ if (cf->flags & CIO_OPEN_RD) {
+ return 0;
+ }
+
+ if (cf->synced == CIO_TRUE) {
+ return 0;
+ }
+
+ ret = cio_file_native_get_size(cf, &file_size);
+
+ if (ret != CIO_OK) {
+ cio_file_report_os_error();
+
+ return -1;
+ }
+
+ /* File trimming has been made opt-in because it causes
+ * performance degradation and excessive fragmentation
+ * in XFS.
+ */
+ if ((ch->ctx->options.flags & CIO_TRIM_FILES) != 0) {
+ /* If there are extra space, truncate the file size */
+ av_size = get_available_size(cf, &meta_len);
+
+ if (av_size > 0) {
+ desired_size = cf->alloc_size - av_size;
+ }
+ else if (cf->alloc_size > file_size) {
+ desired_size = cf->alloc_size;
+ }
+ else {
+ desired_size = file_size;
+ }
+
+ if (desired_size != file_size) {
+ /* When file trimming is enabled we still round the file size up
+ * to the memory page size because even though not explicitly
+ * stated there seems to be a performance degradation issue that
+ * correlates with sub-page mapping.
+ */
+ desired_size = ROUND_UP(desired_size, ch->ctx->page_size);
+
+ ret = cio_file_resize(cf, desired_size);
+
+ if (ret != CIO_OK) {
+ cio_log_error(ch->ctx,
+ "[cio file sync] error adjusting size at: "
+ " %s/%s", ch->st->name, ch->name);
+
+ return ret;
+ }
+ }
+ }
+
+ /* Finalize CRC32 checksum */
+ if (ch->ctx->options.flags & CIO_CHECKSUM) {
+ finalize_checksum(cf);
+ }
+
+ /* Commit changes to disk */
+ ret = cio_file_native_sync(cf, ch->ctx->options.flags);
+
+ if (ret != CIO_OK) {
+ return -1;
+ }
+
+ cf->synced = CIO_TRUE;
+
+ ret = cio_file_update_size(cf);
+
+ if (ret != CIO_OK) {
+ return -1;
+ }
+
+ cio_log_debug(ch->ctx, "[cio file] synced at: %s/%s",
+ ch->st->name, ch->name);
+
+ return 0;
+}
+
+int cio_file_resize(struct cio_file *cf, size_t new_size)
+{
+ int inner_result;
+ size_t mapped_size;
+ int mapped_flag;
+ int result;
+
+ mapped_flag = cio_file_native_is_mapped(cf);
+ mapped_size = cf->alloc_size;
+
+#ifdef _WIN32
+ if (mapped_flag) {
+ result = cio_file_native_unmap(cf);
+
+ if (result != CIO_OK) {
+ return result;
+ }
+ }
+#endif
+
+ result = cio_file_native_resize(cf, new_size);
+
+ if (result != CIO_OK) {
+ cio_file_native_report_os_error();
+
+#ifdef _WIN32
+ if (mapped_flag) {
+ inner_result = cio_file_native_map(cf, mapped_size);
+ }
+#endif
+
+ return result;
+ }
+
+ if (mapped_flag) {
+#ifdef _WIN32
+ result = cio_file_native_map(cf, new_size);
+#else
+ result = cio_file_native_remap(cf, new_size);
+#endif
+
+ if (result != CIO_OK) {
+ return result;
+ }
+ }
+
+ (void) mapped_size;
+ (void) inner_result;
+
+ return CIO_OK;
+}
+
+char *cio_file_hash(struct cio_file *cf)
+{
+ return (cf->map + 2);
+}
+
+void cio_file_hash_print(struct cio_file *cf)
+{
+ printf("crc cur=%lu\n", (long unsigned int)cf->crc_cur);
+ printf("%08lx\n", (long unsigned int ) cf->crc_cur);
+}
+
+/* Dump files from given stream */
+void cio_file_scan_dump(struct cio_ctx *ctx, struct cio_stream *st)
+{
+ int ret;
+ int meta_len;
+ int set_down = CIO_FALSE;
+ char *p;
+ crc_t crc;
+ crc_t crc_fs;
+ char tmp[PATH_MAX];
+ struct mk_list *head;
+ struct cio_chunk *ch;
+ struct cio_file *cf;
+
+ mk_list_foreach(head, &st->chunks) {
+ ch = mk_list_entry(head, struct cio_chunk, _head);
+ cf = ch->backend;
+
+ if (cio_file_is_up(ch, cf) == CIO_FALSE) {
+ ret = cio_file_up(ch);
+ if (ret == -1) {
+ continue;
+ }
+ set_down = CIO_TRUE;
+ }
+
+ snprintf(tmp, sizeof(tmp) -1, "%s/%s", st->name, ch->name);
+ meta_len = cio_file_st_get_meta_len(cf->map);
+
+ p = cio_file_st_get_hash(cf->map);
+
+ memcpy(&crc_fs, p, sizeof(crc_fs));
+ crc_fs = ntohl(crc_fs);
+
+ printf(" %-60s", tmp);
+
+ /*
+ * the crc32 specified in the file is stored in 'val' now, if
+ * checksum mode is enabled we have to verify it.
+ */
+ if (ctx->options.flags & CIO_CHECKSUM) {
+ cio_file_calculate_checksum(cf, &crc);
+
+ /*
+ * finalize the checksum and compare it value using the
+ * host byte order.
+ */
+ crc = cio_crc32_finalize(crc);
+ if (crc != crc_fs) {
+ printf("checksum error=%08x expected=%08x, ",
+ (uint32_t) crc_fs, (uint32_t) crc);
+ }
+ }
+ printf("meta_len=%d, data_size=%zu, crc=%08x\n",
+ meta_len, cf->data_size, (uint32_t) crc_fs);
+
+ if (set_down == CIO_TRUE) {
+ cio_file_down(ch);
+ }
+ }
+}
+
+/* Check if a file content is up in memory and a file descriptor is set */
+int cio_file_is_up(struct cio_chunk *ch, struct cio_file *cf)
+{
+ (void) ch;
+
+ if (cio_file_native_is_open(cf) &&
+ cio_file_native_is_mapped(cf)) {
+ return CIO_TRUE;
+ }
+
+ return CIO_FALSE;
+}