From aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Mon, 6 Feb 2023 17:11:30 +0100
Subject: Adding upstream version 1.38.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 database/engine/journalfile.c | 1445 +++++++++++++++++++++++++++++++++--------
 1 file changed, 1162 insertions(+), 283 deletions(-)

(limited to 'database/engine/journalfile.c')

diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c
index 500dd7880..de2b909c0 100644
--- a/database/engine/journalfile.c
+++ b/database/engine/journalfile.c
@@ -1,132 +1,424 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 #include "rrdengine.h"
 
-static void flush_transaction_buffer_cb(uv_fs_t* req)
+
+// DBENGINE2: Helper
+
+static void update_metric_retention_and_granularity_by_uuid(
+        struct rrdengine_instance *ctx, uuid_t *uuid,
+        time_t first_time_s, time_t last_time_s,
+        time_t update_every_s, time_t now_s)
+{
+    if(unlikely(last_time_s > now_s)) {
+        error_limit_static_global_var(erl, 1, 0);
+        error_limit(&erl, "DBENGINE JV2: wrong last time on-disk (%ld - %ld, now %ld), "
+                          "fixing last time to now",
+                    first_time_s, last_time_s, now_s);
+        last_time_s = now_s;
+    }
+
+    if (unlikely(first_time_s > last_time_s)) {
+        error_limit_static_global_var(erl, 1, 0);
+        error_limit(&erl, "DBENGINE JV2: wrong first time on-disk (%ld - %ld, now %ld), "
+                          "fixing first time to last time",
+                    first_time_s, last_time_s, now_s);
+
+        first_time_s = last_time_s;
+    }
+
+    if (unlikely(first_time_s == 0 || last_time_s == 0)) {
+        error_limit_static_global_var(erl, 1, 0);
+        error_limit(&erl, "DBENGINE JV2: zero on-disk timestamps (%ld - %ld, now %ld), "
+                          "using them as-is",
+                    first_time_s, last_time_s, now_s);
+    }
+
+    bool added = false;
+    METRIC *metric = mrg_metric_get_and_acquire(main_mrg, uuid, (Word_t) ctx);
+    if (!metric) {
+        MRG_ENTRY entry = {
+                .section = (Word_t) ctx,
+                .first_time_s = first_time_s,
+                .last_time_s = last_time_s,
+                .latest_update_every_s = update_every_s
+        };
+        uuid_copy(entry.uuid, *uuid);
+        metric = mrg_metric_add_and_acquire(main_mrg, entry, &added);
+    }
+
+    if (likely(!added))
+        mrg_metric_expand_retention(main_mrg, metric, first_time_s, last_time_s, update_every_s);
+
+    mrg_metric_release(main_mrg, metric);
+}
+
+static void after_extent_write_journalfile_v1_io(uv_fs_t* req)
 {
-    struct generic_io_descriptor *io_descr = req->data;
-    struct rrdengine_worker_config* wc = req->loop->data;
-    struct rrdengine_instance *ctx = wc->ctx;
+    worker_is_busy(RRDENG_FLUSH_TRANSACTION_BUFFER_CB);
+
+    WAL *wal = req->data;
+    struct generic_io_descriptor *io_descr = &wal->io_descr;
+    struct rrdengine_instance *ctx = io_descr->ctx;
 
     debug(D_RRDENGINE, "%s: Journal block was written to disk.", __func__);
     if (req->result < 0) {
-        ++ctx->stats.io_errors;
-        rrd_stat_atomic_add(&global_io_errors, 1);
-        error("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result));
+        ctx_io_error(ctx);
+        error("DBENGINE: %s: uv_fs_write: %s", __func__, uv_strerror((int)req->result));
     } else {
         debug(D_RRDENGINE, "%s: Journal block was written to disk.", __func__);
     }
 
     uv_fs_req_cleanup(req);
-    posix_memfree(io_descr->buf);
-    freez(io_descr);
+    wal_release(wal);
+
+    __atomic_sub_fetch(&ctx->atomic.extents_currently_being_flushed, 1, __ATOMIC_RELAXED);
+
+    worker_is_idle();
 }
 
 /* Careful to always call this before creating a new journal file */
-void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc)
+void journalfile_v1_extent_write(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile, WAL *wal, uv_loop_t *loop)
 {
-    struct rrdengine_instance *ctx = wc->ctx;
     int ret;
     struct generic_io_descriptor *io_descr;
-    unsigned pos, size;
-    struct rrdengine_journalfile *journalfile;
+    struct rrdengine_journalfile *journalfile = datafile->journalfile;
 
-    if (unlikely(NULL == ctx->commit_log.buf || 0 == ctx->commit_log.buf_pos)) {
-        return;
-    }
-    /* care with outstanding transactions when switching journal files */
-    journalfile = ctx->datafiles.last->journalfile;
-
-    io_descr = mallocz(sizeof(*io_descr));
-    pos = ctx->commit_log.buf_pos;
-    size = ctx->commit_log.buf_size;
-    if (pos < size) {
+    io_descr = &wal->io_descr;
+    io_descr->ctx = ctx;
+    if (wal->size < wal->buf_size) {
         /* simulate an empty transaction to skip the rest of the block */
-        *(uint8_t *) (ctx->commit_log.buf + pos) = STORE_PADDING;
+        *(uint8_t *) (wal->buf + wal->size) = STORE_PADDING;
     }
-    io_descr->buf = ctx->commit_log.buf;
-    io_descr->bytes = size;
-    io_descr->pos = journalfile->pos;
-    io_descr->req.data = io_descr;
+    io_descr->buf = wal->buf;
+    io_descr->bytes = wal->buf_size;
+
+    netdata_spinlock_lock(&journalfile->unsafe.spinlock);
+    io_descr->pos = journalfile->unsafe.pos;
+    journalfile->unsafe.pos += wal->buf_size;
+    netdata_spinlock_unlock(&journalfile->unsafe.spinlock);
+
+    io_descr->req.data = wal;
+    io_descr->data = journalfile;
     io_descr->completion = NULL;
 
-    io_descr->iov = uv_buf_init((void *)io_descr->buf, size);
-    ret = uv_fs_write(wc->loop, &io_descr->req, journalfile->file, &io_descr->iov, 1,
-                      journalfile->pos, flush_transaction_buffer_cb);
+    io_descr->iov = uv_buf_init((void *)io_descr->buf, wal->buf_size);
+    ret = uv_fs_write(loop, &io_descr->req, journalfile->file, &io_descr->iov, 1,
+                      (int64_t)io_descr->pos, after_extent_write_journalfile_v1_io);
     fatal_assert(-1 != ret);
-    journalfile->pos += RRDENG_BLOCK_SIZE;
-    ctx->disk_space += RRDENG_BLOCK_SIZE;
-    ctx->commit_log.buf = NULL;
-    ctx->stats.io_write_bytes += RRDENG_BLOCK_SIZE;
-    ++ctx->stats.io_write_requests;
+
+    ctx_current_disk_space_increase(ctx, wal->buf_size);
+    ctx_io_write_op_bytes(ctx, wal->buf_size);
 }
 
-void * wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size)
+void journalfile_v2_generate_path(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
 {
-    struct rrdengine_instance *ctx = wc->ctx;
-    int ret;
-    unsigned buf_pos = 0, buf_size;
-
-    fatal_assert(size);
-    if (ctx->commit_log.buf) {
-        unsigned remaining;
-
-        buf_pos = ctx->commit_log.buf_pos;
-        buf_size = ctx->commit_log.buf_size;
-        remaining = buf_size - buf_pos;
-        if (size > remaining) {
-            /* we need a new buffer */
-            wal_flush_transaction_buffer(wc);
+    (void) snprintfz(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION_V2,
+                    datafile->ctx->config.dbfiles_path, datafile->tier, datafile->fileno);
+}
+
+void journalfile_v1_generate_path(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
+{
+    (void) snprintfz(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION,
+                    datafile->ctx->config.dbfiles_path, datafile->tier, datafile->fileno);
+}
+
+static struct journal_v2_header *journalfile_v2_mounted_data_get(struct rrdengine_journalfile *journalfile, size_t *data_size) {
+    struct journal_v2_header *j2_header = NULL;
+
+    netdata_spinlock_lock(&journalfile->mmap.spinlock);
+
+    if(!journalfile->mmap.data) {
+        journalfile->mmap.data = mmap(NULL, journalfile->mmap.size, PROT_READ, MAP_SHARED, journalfile->mmap.fd, 0);
+        if (journalfile->mmap.data == MAP_FAILED) {
+            internal_fatal(true, "DBENGINE: failed to re-mmap() journal file v2");
+            close(journalfile->mmap.fd);
+            journalfile->mmap.fd = -1;
+            journalfile->mmap.data = NULL;
+            journalfile->mmap.size = 0;
+
+            netdata_spinlock_lock(&journalfile->v2.spinlock);
+            journalfile->v2.flags &= ~(JOURNALFILE_FLAG_IS_AVAILABLE | JOURNALFILE_FLAG_IS_MOUNTED);
+            netdata_spinlock_unlock(&journalfile->v2.spinlock);
+
+            ctx_fs_error(journalfile->datafile->ctx);
+        }
+        else {
+            __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_mapped, 1, __ATOMIC_RELAXED);
+
+            madvise_dontfork(journalfile->mmap.data, journalfile->mmap.size);
+            madvise_dontdump(journalfile->mmap.data, journalfile->mmap.size);
+            madvise_random(journalfile->mmap.data, journalfile->mmap.size);
+            madvise_dontneed(journalfile->mmap.data, journalfile->mmap.size);
+
+            netdata_spinlock_lock(&journalfile->v2.spinlock);
+            journalfile->v2.flags |= JOURNALFILE_FLAG_IS_AVAILABLE | JOURNALFILE_FLAG_IS_MOUNTED;
+            netdata_spinlock_unlock(&journalfile->v2.spinlock);
+        }
+    }
+
+    if(journalfile->mmap.data) {
+        j2_header = journalfile->mmap.data;
+
+        if (data_size)
+            *data_size = journalfile->mmap.size;
+    }
+
+    netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+
+    return j2_header;
+}
+
+static bool journalfile_v2_mounted_data_unmount(struct rrdengine_journalfile *journalfile, bool have_locks, bool wait) {
+    bool unmounted = false;
+
+    if(!have_locks) {
+        if(!wait) {
+            if (!netdata_spinlock_trylock(&journalfile->mmap.spinlock))
+                return false;
         }
+        else
+            netdata_spinlock_lock(&journalfile->mmap.spinlock);
+
+        if(!wait) {
+            if(!netdata_spinlock_trylock(&journalfile->v2.spinlock)) {
+                netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+                return false;
+            }
+        }
+        else
+            netdata_spinlock_lock(&journalfile->v2.spinlock);
     }
-    if (NULL == ctx->commit_log.buf) {
-        buf_size = ALIGN_BYTES_CEILING(size);
-        ret = posix_memalign((void *)&ctx->commit_log.buf, RRDFILE_ALIGNMENT, buf_size);
-        if (unlikely(ret)) {
-            fatal("posix_memalign:%s", strerror(ret));
+
+    if(!journalfile->v2.refcount) {
+        if(journalfile->mmap.data) {
+            if (munmap(journalfile->mmap.data, journalfile->mmap.size)) {
+                char path[RRDENG_PATH_MAX];
+                journalfile_v2_generate_path(journalfile->datafile, path, sizeof(path));
+                error("DBENGINE: failed to unmap index file '%s'", path);
+                internal_fatal(true, "DBENGINE: failed to unmap file '%s'", path);
+                ctx_fs_error(journalfile->datafile->ctx);
+            }
+            else {
+                __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_unmapped, 1, __ATOMIC_RELAXED);
+                journalfile->mmap.data = NULL;
+                journalfile->v2.flags &= ~JOURNALFILE_FLAG_IS_MOUNTED;
+            }
         }
-        memset(ctx->commit_log.buf, 0, buf_size);
-        buf_pos = ctx->commit_log.buf_pos = 0;
-        ctx->commit_log.buf_size =  buf_size;
+
+        unmounted = true;
     }
-    ctx->commit_log.buf_pos += size;
 
-    return ctx->commit_log.buf + buf_pos;
+    if(!have_locks) {
+        netdata_spinlock_unlock(&journalfile->v2.spinlock);
+        netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+    }
+
+    return unmounted;
 }
 
-void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
-{
-    (void) snprintfz(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION,
-                    datafile->ctx->dbfiles_path, datafile->tier, datafile->fileno);
+void journalfile_v2_data_unmount_cleanup(time_t now_s) {
+    // DO NOT WAIT ON ANY LOCK!!!
+
+    for(size_t tier = 0; tier < (size_t)storage_tiers ;tier++) {
+        struct rrdengine_instance *ctx = multidb_ctx[tier];
+        if(!ctx) continue;
+
+        struct rrdengine_datafile *datafile;
+        if(uv_rwlock_tryrdlock(&ctx->datafiles.rwlock) != 0)
+            continue;
+
+        for (datafile = ctx->datafiles.first; datafile; datafile = datafile->next) {
+            struct rrdengine_journalfile *journalfile = datafile->journalfile;
+
+            if(!netdata_spinlock_trylock(&journalfile->v2.spinlock))
+                continue;
+
+            bool unmount = false;
+            if (!journalfile->v2.refcount && (journalfile->v2.flags & JOURNALFILE_FLAG_IS_MOUNTED)) {
+                // this journal has no references and it is mounted
+
+                if (!journalfile->v2.not_needed_since_s)
+                    journalfile->v2.not_needed_since_s = now_s;
+
+                else if (now_s - journalfile->v2.not_needed_since_s >= 120)
+                    // 2 minutes have passed since last use
+                    unmount = true;
+            }
+            netdata_spinlock_unlock(&journalfile->v2.spinlock);
+
+            if (unmount)
+                journalfile_v2_mounted_data_unmount(journalfile, false, false);
+        }
+        uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
+    }
+}
+
+struct journal_v2_header *journalfile_v2_data_acquire(struct rrdengine_journalfile *journalfile, size_t *data_size, time_t wanted_first_time_s, time_t wanted_last_time_s) {
+    netdata_spinlock_lock(&journalfile->v2.spinlock);
+
+    bool has_data = (journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE);
+    bool is_mounted = (journalfile->v2.flags & JOURNALFILE_FLAG_IS_MOUNTED);
+    bool do_we_need_it = false;
+
+    if(has_data) {
+        if (!wanted_first_time_s || !wanted_last_time_s ||
+            is_page_in_time_range(journalfile->v2.first_time_s, journalfile->v2.last_time_s,
+                                  wanted_first_time_s, wanted_last_time_s) == PAGE_IS_IN_RANGE) {
+
+            journalfile->v2.refcount++;
+
+            do_we_need_it = true;
+
+            if (!wanted_first_time_s && !wanted_last_time_s && !is_mounted)
+                journalfile->v2.flags |= JOURNALFILE_FLAG_MOUNTED_FOR_RETENTION;
+            else
+                journalfile->v2.flags &= ~JOURNALFILE_FLAG_MOUNTED_FOR_RETENTION;
+
+        }
+    }
+    netdata_spinlock_unlock(&journalfile->v2.spinlock);
+
+    if(do_we_need_it)
+        return journalfile_v2_mounted_data_get(journalfile, data_size);
+
+    return NULL;
+}
+
+void journalfile_v2_data_release(struct rrdengine_journalfile *journalfile) {
+    netdata_spinlock_lock(&journalfile->v2.spinlock);
+
+    internal_fatal(!journalfile->mmap.data, "trying to release a journalfile without data");
+    internal_fatal(journalfile->v2.refcount < 1, "trying to release a non-acquired journalfile");
+
+    bool unmount = false;
+
+    journalfile->v2.refcount--;
+
+    if(journalfile->v2.refcount == 0) {
+        journalfile->v2.not_needed_since_s = 0;
+
+        if(journalfile->v2.flags & JOURNALFILE_FLAG_MOUNTED_FOR_RETENTION)
+            unmount = true;
+    }
+    netdata_spinlock_unlock(&journalfile->v2.spinlock);
+
+    if(unmount)
+        journalfile_v2_mounted_data_unmount(journalfile, false, true);
+}
+
+bool journalfile_v2_data_available(struct rrdengine_journalfile *journalfile) {
+
+    netdata_spinlock_lock(&journalfile->v2.spinlock);
+    bool has_data = (journalfile->v2.flags & JOURNALFILE_FLAG_IS_AVAILABLE);
+    netdata_spinlock_unlock(&journalfile->v2.spinlock);
+
+    return has_data;
+}
+
+size_t journalfile_v2_data_size_get(struct rrdengine_journalfile *journalfile) {
+
+    netdata_spinlock_lock(&journalfile->mmap.spinlock);
+    size_t data_size = journalfile->mmap.size;
+    netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+
+    return data_size;
+}
+
+void journalfile_v2_data_set(struct rrdengine_journalfile *journalfile, int fd, void *journal_data, uint32_t journal_data_size) {
+    netdata_spinlock_lock(&journalfile->mmap.spinlock);
+    netdata_spinlock_lock(&journalfile->v2.spinlock);
+
+    internal_fatal(journalfile->mmap.fd != -1, "DBENGINE JOURNALFILE: trying to re-set journal fd");
+    internal_fatal(journalfile->mmap.data, "DBENGINE JOURNALFILE: trying to re-set journal_data");
+    internal_fatal(journalfile->v2.refcount, "DBENGINE JOURNALFILE: trying to re-set journal_data of referenced journalfile");
+
+    journalfile->mmap.fd = fd;
+    journalfile->mmap.data = journal_data;
+    journalfile->mmap.size = journal_data_size;
+    journalfile->v2.not_needed_since_s = now_monotonic_sec();
+    journalfile->v2.flags |= JOURNALFILE_FLAG_IS_AVAILABLE | JOURNALFILE_FLAG_IS_MOUNTED;
+
+    struct journal_v2_header *j2_header = journalfile->mmap.data;
+    journalfile->v2.first_time_s = (time_t)(j2_header->start_time_ut / USEC_PER_SEC);
+    journalfile->v2.last_time_s = (time_t)(j2_header->end_time_ut / USEC_PER_SEC);
+
+    journalfile_v2_mounted_data_unmount(journalfile, true, true);
+
+    netdata_spinlock_unlock(&journalfile->v2.spinlock);
+    netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+}
+
+static void journalfile_v2_data_unmap_permanently(struct rrdengine_journalfile *journalfile) {
+    bool has_references = false;
+
+    do {
+        if (has_references)
+            sleep_usec(10 * USEC_PER_MS);
+
+        netdata_spinlock_lock(&journalfile->mmap.spinlock);
+        netdata_spinlock_lock(&journalfile->v2.spinlock);
+
+        if(journalfile_v2_mounted_data_unmount(journalfile, true, true)) {
+            if(journalfile->mmap.fd != -1)
+                close(journalfile->mmap.fd);
+
+            journalfile->mmap.fd = -1;
+            journalfile->mmap.data = NULL;
+            journalfile->mmap.size = 0;
+            journalfile->v2.first_time_s = 0;
+            journalfile->v2.last_time_s = 0;
+            journalfile->v2.flags = 0;
+        }
+        else {
+            has_references = true;
+            internal_error(true, "DBENGINE JOURNALFILE: waiting for journalfile to be available to unmap...");
+        }
+
+        netdata_spinlock_unlock(&journalfile->v2.spinlock);
+        netdata_spinlock_unlock(&journalfile->mmap.spinlock);
+
+    } while(has_references);
 }
 
-void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+struct rrdengine_journalfile *journalfile_alloc_and_init(struct rrdengine_datafile *datafile)
 {
-    journalfile->file = (uv_file)0;
-    journalfile->pos = 0;
+    struct rrdengine_journalfile *journalfile = callocz(1, sizeof(struct rrdengine_journalfile));
     journalfile->datafile = datafile;
+    netdata_spinlock_init(&journalfile->mmap.spinlock);
+    netdata_spinlock_init(&journalfile->v2.spinlock);
+    netdata_spinlock_init(&journalfile->unsafe.spinlock);
+    journalfile->mmap.fd = -1;
+    datafile->journalfile = journalfile;
+    return journalfile;
 }
 
-int close_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+static int close_uv_file(struct rrdengine_datafile *datafile, uv_file file)
 {
-    struct rrdengine_instance *ctx = datafile->ctx;
-    uv_fs_t req;
     int ret;
     char path[RRDENG_PATH_MAX];
 
-    generate_journalfilepath(datafile, path, sizeof(path));
-
-    ret = uv_fs_close(NULL, &req, journalfile->file, NULL);
+    uv_fs_t req;
+    ret = uv_fs_close(NULL, &req, file, NULL);
     if (ret < 0) {
-        error("uv_fs_close(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        journalfile_v1_generate_path(datafile, path, sizeof(path));
+        error("DBENGINE: uv_fs_close(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(datafile->ctx);
     }
     uv_fs_req_cleanup(&req);
-
     return ret;
 }
 
-int unlink_journal_file(struct rrdengine_journalfile *journalfile)
+int journalfile_close(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+{
+    if(journalfile_v2_data_available(journalfile)) {
+        journalfile_v2_data_unmap_permanently(journalfile);
+        return 0;
+    }
+
+    return close_uv_file(datafile, journalfile->file);
+}
+
+int journalfile_unlink(struct rrdengine_journalfile *journalfile)
 {
     struct rrdengine_datafile *datafile = journalfile->datafile;
     struct rrdengine_instance *ctx = datafile->ctx;
@@ -134,60 +426,65 @@ int unlink_journal_file(struct rrdengine_journalfile *journalfile)
     int ret;
     char path[RRDENG_PATH_MAX];
 
-    generate_journalfilepath(datafile, path, sizeof(path));
+    journalfile_v1_generate_path(datafile, path, sizeof(path));
 
     ret = uv_fs_unlink(NULL, &req, path, NULL);
     if (ret < 0) {
-        error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        error("DBENGINE: uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(ctx);
     }
     uv_fs_req_cleanup(&req);
 
-    ++ctx->stats.journalfile_deletions;
+    __atomic_add_fetch(&ctx->stats.journalfile_deletions, 1, __ATOMIC_RELAXED);
 
     return ret;
 }
 
-int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+int journalfile_destroy_unsafe(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
 {
     struct rrdengine_instance *ctx = datafile->ctx;
     uv_fs_t req;
     int ret;
     char path[RRDENG_PATH_MAX];
+    char path_v2[RRDENG_PATH_MAX];
 
-    generate_journalfilepath(datafile, path, sizeof(path));
+    journalfile_v1_generate_path(datafile, path, sizeof(path));
+    journalfile_v2_generate_path(datafile, path_v2, sizeof(path));
 
+    if (journalfile->file) {
     ret = uv_fs_ftruncate(NULL, &req, journalfile->file, 0, NULL);
     if (ret < 0) {
-        error("uv_fs_ftruncate(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        error("DBENGINE: uv_fs_ftruncate(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(ctx);
     }
     uv_fs_req_cleanup(&req);
+        (void) close_uv_file(datafile, journalfile->file);
+    }
 
-    ret = uv_fs_close(NULL, &req, journalfile->file, NULL);
+    // This is the new journal v2 index file
+    ret = uv_fs_unlink(NULL, &req, path_v2, NULL);
     if (ret < 0) {
-        error("uv_fs_close(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        error("DBENGINE: uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(ctx);
     }
     uv_fs_req_cleanup(&req);
 
     ret = uv_fs_unlink(NULL, &req, path, NULL);
     if (ret < 0) {
-        error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        error("DBENGINE: uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(ctx);
     }
     uv_fs_req_cleanup(&req);
 
-    ++ctx->stats.journalfile_deletions;
+    __atomic_add_fetch(&ctx->stats.journalfile_deletions, 2, __ATOMIC_RELAXED);
+
+    if(journalfile_v2_data_available(journalfile))
+        journalfile_v2_data_unmap_permanently(journalfile);
 
     return ret;
 }
 
-int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
 {
     struct rrdengine_instance *ctx = datafile->ctx;
     uv_fs_t req;
@@ -197,19 +494,18 @@ int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdeng
     uv_buf_t iov;
     char path[RRDENG_PATH_MAX];
 
-    generate_journalfilepath(datafile, path, sizeof(path));
-    fd = open_file_direct_io(path, O_CREAT | O_RDWR | O_TRUNC, &file);
+    journalfile_v1_generate_path(datafile, path, sizeof(path));
+    fd = open_file_for_io(path, O_CREAT | O_RDWR | O_TRUNC, &file, use_direct_io);
     if (fd < 0) {
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        ctx_fs_error(ctx);
         return fd;
     }
     journalfile->file = file;
-    ++ctx->stats.journalfile_creations;
+    __atomic_add_fetch(&ctx->stats.journalfile_creations, 1, __ATOMIC_RELAXED);
 
     ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
     if (unlikely(ret)) {
-        fatal("posix_memalign:%s", strerror(ret));
+        fatal("DBENGINE: posix_memalign:%s", strerror(ret));
     }
     memset(superblock, 0, sizeof(*superblock));
     (void) strncpy(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ);
@@ -220,25 +516,24 @@ int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdeng
     ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL);
     if (ret < 0) {
         fatal_assert(req.result < 0);
-        error("uv_fs_write: %s", uv_strerror(ret));
-        ++ctx->stats.io_errors;
-        rrd_stat_atomic_add(&global_io_errors, 1);
+        error("DBENGINE: uv_fs_write: %s", uv_strerror(ret));
+        ctx_io_error(ctx);
     }
     uv_fs_req_cleanup(&req);
     posix_memfree(superblock);
     if (ret < 0) {
-        destroy_journal_file(journalfile, datafile);
+        journalfile_destroy_unsafe(journalfile, datafile);
         return ret;
     }
 
-    journalfile->pos = sizeof(*superblock);
-    ctx->stats.io_write_bytes += sizeof(*superblock);
-    ++ctx->stats.io_write_requests;
+    journalfile->unsafe.pos = sizeof(*superblock);
+
+    ctx_io_write_op_bytes(ctx, sizeof(*superblock));
 
     return 0;
 }
 
-static int check_journal_file_superblock(uv_file file)
+static int journalfile_check_superblock(uv_file file)
 {
     int ret;
     struct rrdeng_jf_sb *superblock;
@@ -247,13 +542,13 @@ static int check_journal_file_superblock(uv_file file)
 
     ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
     if (unlikely(ret)) {
-        fatal("posix_memalign:%s", strerror(ret));
+        fatal("DBENGINE: posix_memalign:%s", strerror(ret));
     }
     iov = uv_buf_init((void *)superblock, sizeof(*superblock));
 
     ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL);
     if (ret < 0) {
-        error("uv_fs_read: %s", uv_strerror(ret));
+        error("DBENGINE: uv_fs_read: %s", uv_strerror(ret));
         uv_fs_req_cleanup(&req);
         goto error;
     }
@@ -262,7 +557,7 @@ static int check_journal_file_superblock(uv_file file)
 
     if (strncmp(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ) ||
         strncmp(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ)) {
-        error("File has invalid superblock.");
+        error("DBENGINE: File has invalid superblock.");
         ret = UV_EINVAL;
     } else {
         ret = 0;
@@ -272,15 +567,10 @@ static int check_journal_file_superblock(uv_file file)
     return ret;
 }
 
-static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
-                                    void *buf, unsigned max_size)
+static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, void *buf, unsigned max_size)
 {
     static BITMAP256 page_error_map;
-    struct page_cache *pg_cache = &ctx->pg_cache;
-    unsigned i, count, payload_length, descr_size, valid_pages;
-    struct rrdeng_page_descr *descr;
-    struct extent_info *extent;
-    /* persistent structures */
+    unsigned i, count, payload_length, descr_size;
     struct rrdeng_jf_store_data *jf_metric_data;
 
     jf_metric_data = buf;
@@ -288,117 +578,65 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden
     descr_size = sizeof(*jf_metric_data->descr) * count;
     payload_length = sizeof(*jf_metric_data) + descr_size;
     if (payload_length > max_size) {
-        error("Corrupted transaction payload.");
+        error("DBENGINE: corrupted transaction payload.");
         return;
     }
 
-    extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0]));
-    extent->offset = jf_metric_data->extent_offset;
-    extent->size = jf_metric_data->extent_size;
-    extent->datafile = journalfile->datafile;
-    extent->next = NULL;
-
-    for (i = 0, valid_pages = 0 ; i < count ; ++i) {
+    time_t now_s = max_acceptable_collected_time();
+    for (i = 0; i < count ; ++i) {
         uuid_t *temp_id;
-        Pvoid_t *PValue;
-        struct pg_cache_page_index *page_index = NULL;
         uint8_t page_type = jf_metric_data->descr[i].type;
 
         if (page_type > PAGE_TYPE_MAX) {
             if (!bitmap256_get_bit(&page_error_map, page_type)) {
-                error("Unknown page type %d encountered.", page_type);
+                error("DBENGINE: unknown page type %d encountered.", page_type);
                 bitmap256_set_bit(&page_error_map, page_type, 1);
             }
             continue;
         }
-        uint64_t start_time_ut = jf_metric_data->descr[i].start_time_ut;
-        uint64_t end_time_ut = jf_metric_data->descr[i].end_time_ut;
-        size_t entries = jf_metric_data->descr[i].page_length / page_type_size[page_type];
-        time_t update_every_s = (entries > 1) ? ((end_time_ut - start_time_ut) / USEC_PER_SEC / (entries - 1)) : 0;
-
-        if (unlikely(start_time_ut > end_time_ut)) {
-            ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter++;
-            if(ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut < end_time_ut)
-                ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut = end_time_ut;
-            continue;
-        }
 
-        if (unlikely(start_time_ut == end_time_ut && entries != 1)) {
-            ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter++;
-            if(ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut < end_time_ut)
-                ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut = end_time_ut;
-            continue;
-        }
-
-        if (unlikely(!entries)) {
-            ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter++;
-            if(ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut < end_time_ut)
-                ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut = end_time_ut;
-            continue;
-        }
+        temp_id = (uuid_t *)jf_metric_data->descr[i].uuid;
+        METRIC *metric = mrg_metric_get_and_acquire(main_mrg, temp_id, (Word_t) ctx);
 
-        if(entries > 1 && update_every_s == 0) {
-            ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter++;
-            if(ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut < end_time_ut)
-                ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut = end_time_ut;
-            continue;
-        }
+        struct rrdeng_extent_page_descr *descr = &jf_metric_data->descr[i];
+        VALIDATED_PAGE_DESCRIPTOR vd = validate_extent_page_descr(
+                descr, now_s,
+                (metric) ? mrg_metric_get_update_every_s(main_mrg, metric) : 0,
+                false);
 
-        if(start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1) != end_time_ut) {
-            ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter++;
-            if(ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut < end_time_ut)
-                ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut = end_time_ut;
+        if(!vd.is_valid) {
+            if(metric)
+                mrg_metric_release(main_mrg, metric);
 
-            // let this be
-            // end_time_ut = start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1);
+            continue;
         }
 
-        temp_id = (uuid_t *)jf_metric_data->descr[i].uuid;
-
-        uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
-        PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t));
-        if (likely(NULL != PValue)) {
-            page_index = *PValue;
-        }
-        uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
-        if (NULL == PValue) {
-            /* First time we see the UUID */
-            uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
-            PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t), PJE0);
-            fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
-            *PValue = page_index = create_page_index(temp_id, ctx);
-            page_index->prev = pg_cache->metrics_index.last_page_index;
-            pg_cache->metrics_index.last_page_index = page_index;
-            uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
+        bool update_metric_time = true;
+        if (!metric) {
+            MRG_ENTRY entry = {
+                    .section = (Word_t)ctx,
+                    .first_time_s = vd.start_time_s,
+                    .last_time_s = vd.end_time_s,
+                    .latest_update_every_s = vd.update_every_s,
+            };
+            uuid_copy(entry.uuid, *temp_id);
+
+            bool added;
+            metric = mrg_metric_add_and_acquire(main_mrg, entry, &added);
+            if(added)
+                update_metric_time = false;
         }
+        Word_t metric_id = mrg_metric_id(main_mrg, metric);
 
-        descr = pg_cache_create_descr();
-        descr->page_length = jf_metric_data->descr[i].page_length;
-        descr->start_time_ut = start_time_ut;
-        descr->end_time_ut = end_time_ut;
-        descr->update_every_s = (update_every_s > 0) ? (uint32_t)update_every_s : (page_index->latest_update_every_s);
-        descr->id = &page_index->id;
-        descr->extent = extent;
-        descr->type = page_type;
-        extent->pages[valid_pages++] = descr;
-        pg_cache_insert(ctx, page_index, descr);
+        if (update_metric_time)
+            mrg_metric_expand_retention(main_mrg, metric, vd.start_time_s, vd.end_time_s, vd.update_every_s);
 
-        if(page_index->latest_time_ut == descr->end_time_ut)
-            page_index->latest_update_every_s = descr->update_every_s;
+        pgc_open_add_hot_page(
+                (Word_t)ctx, metric_id, vd.start_time_s, vd.end_time_s, vd.update_every_s,
+                journalfile->datafile,
+                jf_metric_data->extent_offset, jf_metric_data->extent_size, jf_metric_data->descr[i].page_length);
 
-        if(descr->update_every_s == 0)
-            fatal(
-                "DBENGINE: page descriptor update every is zero, end_time_ut = %llu, start_time_ut = %llu, entries = %zu",
-                (unsigned long long)end_time_ut, (unsigned long long)start_time_ut, entries);
-    }
-
-    extent->number_of_pages = valid_pages;
-
-    if (likely(valid_pages))
-        df_extent_insert(extent);
-    else {
-        freez(extent);
-        ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter++;
+        mrg_metric_release(main_mrg, metric);
     }
 }
 
@@ -407,8 +645,8 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden
  * Sets id to the current transaction id or to 0 if unknown.
  * Returns size of transaction record or 0 for unknown size.
  */
-static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
-                                   void *buf, uint64_t *id, unsigned max_size)
+static unsigned journalfile_replay_transaction(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
+                                               void *buf, uint64_t *id, unsigned max_size)
 {
     unsigned payload_length, size_bytes;
     int ret;
@@ -424,14 +662,14 @@ static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdeng
         return 0;
     }
     if (sizeof(*jf_header) > max_size) {
-        error("Corrupted transaction record, skipping.");
+        error("DBENGINE: corrupted transaction record, skipping.");
         return 0;
     }
     *id = jf_header->id;
     payload_length = jf_header->payload_length;
     size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer);
     if (size_bytes > max_size) {
-        error("Corrupted transaction record, skipping.");
+        error("DBENGINE: corrupted transaction record, skipping.");
         return 0;
     }
     jf_trailer = buf + sizeof(*jf_header) + payload_length;
@@ -440,16 +678,16 @@ static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdeng
     ret = crc32cmp(jf_trailer->checksum, crc);
     debug(D_RRDENGINE, "Transaction %"PRIu64" was read from disk. CRC32 check: %s", *id, ret ? "FAILED" : "SUCCEEDED");
     if (unlikely(ret)) {
-        error("Transaction %"PRIu64" was read from disk. CRC32 check: FAILED", *id);
+        error("DBENGINE: transaction %"PRIu64" was read from disk. CRC32 check: FAILED", *id);
         return size_bytes;
     }
     switch (jf_header->type) {
     case STORE_DATA:
         debug(D_RRDENGINE, "Replaying transaction %"PRIu64"", jf_header->id);
-        restore_extent_metadata(ctx, journalfile, buf + sizeof(*jf_header), payload_length);
+            journalfile_restore_extent_metadata(ctx, journalfile, buf + sizeof(*jf_header), payload_length);
         break;
     default:
-        error("Unknown transaction type. Skipping record.");
+        error("DBENGINE: unknown transaction type, skipping record.");
         break;
     }
 
@@ -463,10 +701,10 @@ static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdeng
  * Page cache must already be initialized.
  * Returns the maximum transaction id it discovered.
  */
-static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile)
+static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile)
 {
     uv_file file;
-    uint64_t file_size;//, data_file_size;
+    uint64_t file_size;
     int ret;
     uint64_t pos, pos_i, max_id, id;
     unsigned size_bytes;
@@ -475,39 +713,31 @@ static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrde
     uv_fs_t req;
 
     file = journalfile->file;
-    file_size = journalfile->pos;
-    //data_file_size = journalfile->datafile->pos; TODO: utilize this?
+    file_size = journalfile->unsafe.pos;
 
     max_id = 1;
-    bool journal_is_mmapped = (journalfile->data != NULL);
-    if (unlikely(!journal_is_mmapped)) {
-        ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES);
-        if (unlikely(ret))
-            fatal("posix_memalign:%s", strerror(ret));
-    }
-    else
-        buf = journalfile->data +  sizeof(struct rrdeng_jf_sb);
-    for (pos = sizeof(struct rrdeng_jf_sb) ; pos < file_size ; pos += READAHEAD_BYTES) {
+    ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES);
+    if (unlikely(ret))
+        fatal("DBENGINE: posix_memalign:%s", strerror(ret));
+
+    for (pos = sizeof(struct rrdeng_jf_sb); pos < file_size; pos += READAHEAD_BYTES) {
         size_bytes = MIN(READAHEAD_BYTES, file_size - pos);
-        if (unlikely(!journal_is_mmapped)) {
-            iov = uv_buf_init(buf, size_bytes);
-            ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL);
-            if (ret < 0) {
-                error("uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret));
-                uv_fs_req_cleanup(&req);
-                goto skip_file;
-            }
-            fatal_assert(req.result >= 0);
+        iov = uv_buf_init(buf, size_bytes);
+        ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL);
+        if (ret < 0) {
+            error("DBENGINE: uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret));
             uv_fs_req_cleanup(&req);
-            ++ctx->stats.io_read_requests;
-            ctx->stats.io_read_bytes += size_bytes;
+            goto skip_file;
         }
+        fatal_assert(req.result >= 0);
+        uv_fs_req_cleanup(&req);
+        ctx_io_read_op_bytes(ctx, size_bytes);
 
-        for (pos_i = 0 ; pos_i < size_bytes ; ) {
+        for (pos_i = 0; pos_i < size_bytes;) {
             unsigned max_size;
 
             max_size = pos + size_bytes - pos_i;
-            ret = replay_transaction(ctx, journalfile, buf + pos_i, &id, max_size);
+            ret = journalfile_replay_transaction(ctx, journalfile, buf + pos_i, &id, max_size);
             if (!ret) /* TODO: support transactions bigger than 4K */
                 /* unknown transaction size, move on to the next block */
                 pos_i = ALIGN_BYTES_FLOOR(pos_i + RRDENG_BLOCK_SIZE);
@@ -515,73 +745,722 @@ static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrde
                 pos_i += ret;
             max_id = MAX(max_id, id);
         }
-        if (likely(journal_is_mmapped))
-            buf += size_bytes;
     }
 skip_file:
-    if (unlikely(!journal_is_mmapped))
-        posix_memfree(buf);
+    posix_memfree(buf);
     return max_id;
 }
 
-int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
-                      struct rrdengine_datafile *datafile)
+// Checks that the extent list checksum is valid
+static int journalfile_check_v2_extent_list (void *data_start, size_t file_size)
+{
+    UNUSED(file_size);
+    uLong crc;
+
+    struct journal_v2_header *j2_header = (void *) data_start;
+    struct journal_v2_block_trailer *journal_v2_trailer;
+
+    journal_v2_trailer = (struct journal_v2_block_trailer *) ((uint8_t *) data_start + j2_header->extent_trailer_offset);
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (uint8_t *) data_start + j2_header->extent_offset, j2_header->extent_count * sizeof(struct journal_extent_list));
+    if (unlikely(crc32cmp(journal_v2_trailer->checksum, crc))) {
+        error("DBENGINE: extent list CRC32 check: FAILED");
+        return 1;
+    }
+
+    return 0;
+}
+
+// Checks that the metric list (UUIDs) checksum is valid
+static int journalfile_check_v2_metric_list(void *data_start, size_t file_size)
+{
+    UNUSED(file_size);
+    uLong crc;
+
+    struct journal_v2_header *j2_header = (void *) data_start;
+    struct journal_v2_block_trailer *journal_v2_trailer;
+
+    journal_v2_trailer = (struct journal_v2_block_trailer *) ((uint8_t *) data_start + j2_header->metric_trailer_offset);
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (uint8_t *) data_start + j2_header->metric_offset, j2_header->metric_count * sizeof(struct journal_metric_list));
+    if (unlikely(crc32cmp(journal_v2_trailer->checksum, crc))) {
+        error("DBENGINE: metric list CRC32 check: FAILED");
+        return 1;
+    }
+    return 0;
+}
+
+//
+// Return
+//   0 Ok
+//   1 Invalid
+//   2 Force rebuild
+//   3 skip
+
+static int journalfile_v2_validate(void *data_start, size_t journal_v2_file_size, size_t journal_v1_file_size)
+{
+    int rc;
+    uLong crc;
+
+    struct journal_v2_header *j2_header = (void *) data_start;
+    struct journal_v2_block_trailer *journal_v2_trailer;
+
+    if (j2_header->magic == JOURVAL_V2_REBUILD_MAGIC)
+        return 2;
+
+    if (j2_header->magic == JOURVAL_V2_SKIP_MAGIC)
+        return 3;
+
+    // Magic failure
+    if (j2_header->magic != JOURVAL_V2_MAGIC)
+        return 1;
+
+    if (j2_header->journal_v2_file_size != journal_v2_file_size)
+        return 1;
+
+    if (journal_v1_file_size && j2_header->journal_v1_file_size != journal_v1_file_size)
+        return 1;
+
+    journal_v2_trailer = (struct journal_v2_block_trailer *) ((uint8_t *) data_start + journal_v2_file_size - sizeof(*journal_v2_trailer));
+
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (void *) j2_header, sizeof(*j2_header));
+
+    rc = crc32cmp(journal_v2_trailer->checksum, crc);
+    if (unlikely(rc)) {
+        error("DBENGINE: file CRC32 check: FAILED");
+        return 1;
+    }
+
+    rc = journalfile_check_v2_extent_list(data_start, journal_v2_file_size);
+    if (rc) return 1;
+
+    rc = journalfile_check_v2_metric_list(data_start, journal_v2_file_size);
+    if (rc) return 1;
+
+    if (!db_engine_journal_check)
+        return 0;
+
+    // Verify complete UUID chain
+
+    struct journal_metric_list *metric = (void *) (data_start + j2_header->metric_offset);
+
+    unsigned verified = 0;
+    unsigned entries;
+    unsigned total_pages = 0;
+
+    info("DBENGINE: checking %u metrics that exist in the journal", j2_header->metric_count);
+    for (entries = 0; entries < j2_header->metric_count; entries++) {
+
+        char uuid_str[UUID_STR_LEN];
+        uuid_unparse_lower(metric->uuid, uuid_str);
+        struct journal_page_header *metric_list_header = (void *) (data_start + metric->page_offset);
+        struct journal_page_header local_metric_list_header = *metric_list_header;
+
+        local_metric_list_header.crc = JOURVAL_V2_MAGIC;
+
+        crc = crc32(0L, Z_NULL, 0);
+        crc = crc32(crc, (void *) &local_metric_list_header, sizeof(local_metric_list_header));
+        rc = crc32cmp(metric_list_header->checksum, crc);
+
+        if (!rc) {
+            struct journal_v2_block_trailer *journal_trailer =
+                (void *) data_start + metric->page_offset + sizeof(struct journal_page_header) + (metric_list_header->entries * sizeof(struct journal_page_list));
+
+            crc = crc32(0L, Z_NULL, 0);
+            crc = crc32(crc, (uint8_t *) metric_list_header + sizeof(struct journal_page_header), metric_list_header->entries * sizeof(struct journal_page_list));
+            rc = crc32cmp(journal_trailer->checksum, crc);
+            internal_error(rc, "DBENGINE: index %u : %s entries %u at offset %u verified, DATA CRC computed %lu, stored %u", entries, uuid_str, metric->entries, metric->page_offset,
+                           crc, metric_list_header->crc);
+            if (!rc) {
+                total_pages += metric_list_header->entries;
+                verified++;
+            }
+        }
+
+        metric++;
+        if ((uint32_t)((uint8_t *) metric - (uint8_t *) data_start) > (uint32_t) journal_v2_file_size) {
+            info("DBENGINE: verification failed EOF reached -- total entries %u, verified %u", entries, verified);
+            return 1;
+        }
+    }
+
+    if (entries != verified) {
+        info("DBENGINE: verification failed -- total entries %u, verified %u", entries, verified);
+        return 1;
+    }
+    info("DBENGINE: verification succeeded -- total entries %u, verified %u (%u total pages)", entries, verified, total_pages);
+
+    return 0;
+}
+
+void journalfile_v2_populate_retention_to_mrg(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile) {
+    usec_t started_ut = now_monotonic_usec();
+
+    size_t data_size = 0;
+    struct journal_v2_header *j2_header = journalfile_v2_data_acquire(journalfile, &data_size, 0, 0);
+    if(!j2_header)
+        return;
+
+    uint8_t *data_start = (uint8_t *)j2_header;
+    uint32_t entries = j2_header->metric_count;
+
+    struct journal_metric_list *metric = (struct journal_metric_list *) (data_start + j2_header->metric_offset);
+    time_t header_start_time_s  = (time_t) (j2_header->start_time_ut / USEC_PER_SEC);
+    time_t now_s = max_acceptable_collected_time();
+    for (size_t i=0; i < entries; i++) {
+        time_t start_time_s = header_start_time_s + metric->delta_start_s;
+        time_t end_time_s = header_start_time_s + metric->delta_end_s;
+        time_t update_every_s = (metric->entries > 1) ? ((end_time_s - start_time_s) / (entries - 1)) : 0;
+        update_metric_retention_and_granularity_by_uuid(
+                ctx, &metric->uuid, start_time_s, end_time_s, update_every_s, now_s);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+        struct journal_page_header *metric_list_header = (void *) (data_start + metric->page_offset);
+        fatal_assert(uuid_compare(metric_list_header->uuid, metric->uuid) == 0);
+        fatal_assert(metric->entries == metric_list_header->entries);
+#endif
+        metric++;
+    }
+
+    journalfile_v2_data_release(journalfile);
+    usec_t ended_ut = now_monotonic_usec();
+
+    info("DBENGINE: journal v2 of tier %d, datafile %u populated, size: %0.2f MiB, metrics: %0.2f k, %0.2f ms"
+        , ctx->config.tier, journalfile->datafile->fileno
+        , (double)data_size / 1024 / 1024
+        , (double)entries / 1000
+        , ((double)(ended_ut - started_ut) / USEC_PER_MS)
+        );
+}
+
+int journalfile_v2_load(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
+{
+    int ret, fd;
+    char path_v1[RRDENG_PATH_MAX];
+    char path_v2[RRDENG_PATH_MAX];
+    struct stat statbuf;
+    size_t journal_v1_file_size = 0;
+    size_t journal_v2_file_size;
+
+    journalfile_v1_generate_path(datafile, path_v1, sizeof(path_v1));
+    ret = stat(path_v1, &statbuf);
+    if (!ret)
+        journal_v1_file_size = (uint32_t)statbuf.st_size;
+
+    journalfile_v2_generate_path(datafile, path_v2, sizeof(path_v2));
+    fd = open(path_v2, O_RDONLY);
+    if (fd < 0) {
+        if (errno == ENOENT)
+            return 1;
+        ctx_fs_error(ctx);
+        error("DBENGINE: failed to open '%s'", path_v2);
+        return 1;
+    }
+
+    ret = fstat(fd, &statbuf);
+    if (ret) {
+        error("DBENGINE: failed to get file information for '%s'", path_v2);
+        close(fd);
+        return 1;
+    }
+
+    journal_v2_file_size = (size_t)statbuf.st_size;
+
+    if (journal_v2_file_size < sizeof(struct journal_v2_header)) {
+        error_report("Invalid file %s. Not the expected size", path_v2);
+        close(fd);
+        return 1;
+    }
+
+    usec_t mmap_start_ut = now_monotonic_usec();
+    uint8_t *data_start = mmap(NULL, journal_v2_file_size, PROT_READ, MAP_SHARED, fd, 0);
+    if (data_start == MAP_FAILED) {
+        close(fd);
+        return 1;
+    }
+
+    info("DBENGINE: checking integrity of '%s'", path_v2);
+    usec_t validation_start_ut = now_monotonic_usec();
+    int rc = journalfile_v2_validate(data_start, journal_v2_file_size, journal_v1_file_size);
+    if (unlikely(rc)) {
+        if (rc == 2)
+            error_report("File %s needs to be rebuilt", path_v2);
+        else if (rc == 3)
+            error_report("File %s will be skipped", path_v2);
+        else
+            error_report("File %s is invalid and it will be rebuilt", path_v2);
+
+        if (unlikely(munmap(data_start, journal_v2_file_size)))
+            error("DBENGINE: failed to unmap '%s'", path_v2);
+
+        close(fd);
+        return rc;
+    }
+
+    struct journal_v2_header *j2_header = (void *) data_start;
+    uint32_t entries = j2_header->metric_count;
+
+    if (unlikely(!entries)) {
+        if (unlikely(munmap(data_start, journal_v2_file_size)))
+            error("DBENGINE: failed to unmap '%s'", path_v2);
+
+        close(fd);
+        return 1;
+    }
+
+    usec_t finished_ut = now_monotonic_usec();
+
+    info("DBENGINE: journal v2 '%s' loaded, size: %0.2f MiB, metrics: %0.2f k, "
+         "mmap: %0.2f ms, validate: %0.2f ms"
+         , path_v2
+         , (double)journal_v2_file_size / 1024 / 1024
+         , (double)entries / 1000
+         , ((double)(validation_start_ut - mmap_start_ut) / USEC_PER_MS)
+         , ((double)(finished_ut - validation_start_ut) / USEC_PER_MS)
+         );
+
+    // Initialize the journal file to be able to access the data
+    journalfile_v2_data_set(journalfile, fd, data_start, journal_v2_file_size);
+
+    ctx_current_disk_space_increase(ctx, journal_v2_file_size);
+
+    // File is OK load it
+    return 0;
+}
+
+struct journal_metric_list_to_sort {
+    struct jv2_metrics_info *metric_info;
+};
+
+static int journalfile_metric_compare (const void *item1, const void *item2)
+{
+    const struct jv2_metrics_info *metric1 = ((struct journal_metric_list_to_sort *) item1)->metric_info;
+    const struct jv2_metrics_info *metric2 = ((struct journal_metric_list_to_sort *) item2)->metric_info;
+
+    return uuid_compare(*(metric1->uuid), *(metric2->uuid));
+}
+
+
+// Write list of extents for the journalfile
+void *journalfile_v2_write_extent_list(Pvoid_t JudyL_extents_pos, void *data)
+{
+    Pvoid_t *PValue;
+    struct journal_extent_list *j2_extent_base = (void *) data;
+    struct jv2_extents_info *ext_info;
+
+    bool first = true;
+    Word_t pos = 0;
+    size_t count = 0;
+    while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &pos, &first))) {
+        ext_info = *PValue;
+        size_t index = ext_info->index;
+        j2_extent_base[index].file_index = 0;
+        j2_extent_base[index].datafile_offset = ext_info->pos;
+        j2_extent_base[index].datafile_size = ext_info->bytes;
+        j2_extent_base[index].pages = ext_info->number_of_pages;
+        count++;
+    }
+    return j2_extent_base + count;
+}
+
+static int journalfile_verify_space(struct journal_v2_header *j2_header, void *data, uint32_t bytes)
+{
+    if ((unsigned long)(((uint8_t *) data - (uint8_t *)  j2_header->data) + bytes) > (j2_header->journal_v2_file_size - sizeof(struct journal_v2_block_trailer)))
+        return 1;
+
+    return 0;
+}
+
+void *journalfile_v2_write_metric_page(struct journal_v2_header *j2_header, void *data, struct jv2_metrics_info *metric_info, uint32_t pages_offset)
+{
+    struct journal_metric_list *metric = (void *) data;
+
+    if (journalfile_verify_space(j2_header, data, sizeof(*metric)))
+        return NULL;
+
+    uuid_copy(metric->uuid, *metric_info->uuid);
+    metric->entries = metric_info->number_of_pages;
+    metric->page_offset = pages_offset;
+    metric->delta_start_s = (uint32_t)(metric_info->first_time_s - (time_t)(j2_header->start_time_ut / USEC_PER_SEC));
+    metric->delta_end_s = (uint32_t)(metric_info->last_time_s - (time_t)(j2_header->start_time_ut / USEC_PER_SEC));
+
+    return ++metric;
+}
+
+void *journalfile_v2_write_data_page_header(struct journal_v2_header *j2_header __maybe_unused, void *data, struct jv2_metrics_info *metric_info, uint32_t uuid_offset)
+{
+    struct journal_page_header *data_page_header = (void *) data;
+    uLong crc;
+
+    uuid_copy(data_page_header->uuid, *metric_info->uuid);
+    data_page_header->entries = metric_info->number_of_pages;
+    data_page_header->uuid_offset = uuid_offset;        // data header OFFSET poings to METRIC in the directory
+    data_page_header->crc = JOURVAL_V2_MAGIC;
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (void *) data_page_header, sizeof(*data_page_header));
+    crc32set(data_page_header->checksum, crc);
+    return ++data_page_header;
+}
+
+void *journalfile_v2_write_data_page_trailer(struct journal_v2_header *j2_header __maybe_unused, void *data, void *page_header)
+{
+    struct journal_page_header *data_page_header = (void *) page_header;
+    struct journal_v2_block_trailer *journal_trailer = (void *) data;
+    uLong crc;
+
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (uint8_t *) page_header + sizeof(struct journal_page_header), data_page_header->entries * sizeof(struct journal_page_list));
+    crc32set(journal_trailer->checksum, crc);
+    return ++journal_trailer;
+}
+
+void *journalfile_v2_write_data_page(struct journal_v2_header *j2_header, void *data, struct jv2_page_info *page_info)
+{
+    struct journal_page_list *data_page = data;
+
+    if (journalfile_verify_space(j2_header, data, sizeof(*data_page)))
+        return NULL;
+
+    struct extent_io_data *ei = page_info->custom_data;
+
+    data_page->delta_start_s = (uint32_t) (page_info->start_time_s - (time_t) (j2_header->start_time_ut) / USEC_PER_SEC);
+    data_page->delta_end_s = (uint32_t) (page_info->end_time_s - (time_t) (j2_header->start_time_ut) / USEC_PER_SEC);
+    data_page->extent_index = page_info->extent_index;
+
+    data_page->update_every_s = page_info->update_every_s;
+    data_page->page_length = (uint16_t) (ei ? ei->page_length : page_info->page_length);
+    data_page->type = 0;
+
+    return ++data_page;
+}
+
+// Must be recorded in metric_info->entries
+void *journalfile_v2_write_descriptors(struct journal_v2_header *j2_header, void *data, struct jv2_metrics_info *metric_info)
+{
+    Pvoid_t *PValue;
+
+    struct journal_page_list *data_page = (void *)data;
+    // We need to write all descriptors with index metric_info->min_index_time_s, metric_info->max_index_time_s
+    // that belong to this journal file
+    Pvoid_t JudyL_array = metric_info->JudyL_pages_by_start_time;
+
+    Word_t index_time = 0;
+    bool first = true;
+    struct jv2_page_info *page_info;
+    while ((PValue = JudyLFirstThenNext(JudyL_array, &index_time, &first))) {
+        page_info = *PValue;
+        // Write one descriptor and return the next data page location
+        data_page = journalfile_v2_write_data_page(j2_header, (void *) data_page, page_info);
+        if (NULL == data_page)
+            break;
+    }
+    return data_page;
+}
+
+// Migrate the journalfile pointed by datafile
+// activate : make the new file active immediately
+//            journafile data will be set and descriptors (if deleted) will be repopulated as needed
+// startup  : if the migration is done during agent startup
+//            this will allow us to optimize certain things
+
+void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_unused, uint8_t type __maybe_unused,
+                                        Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos,
+                                        size_t number_of_extents, size_t number_of_metrics, size_t number_of_pages, void *user_data)
+{
+    char path[RRDENG_PATH_MAX];
+    Pvoid_t *PValue;
+    struct rrdengine_instance *ctx = (struct rrdengine_instance *) section;
+    struct rrdengine_journalfile *journalfile = (struct rrdengine_journalfile *) user_data;
+    struct rrdengine_datafile *datafile = journalfile->datafile;
+    time_t min_time_s = LONG_MAX;
+    time_t max_time_s = 0;
+    struct jv2_metrics_info *metric_info;
+
+    journalfile_v2_generate_path(datafile, path, sizeof(path));
+
+    info("DBENGINE: indexing file '%s': extents %zu, metrics %zu, pages %zu",
+        path,
+        number_of_extents,
+        number_of_metrics,
+        number_of_pages);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+    usec_t start_loading = now_monotonic_usec();
+#endif
+
+    size_t total_file_size = 0;
+    total_file_size  += (sizeof(struct journal_v2_header) + JOURNAL_V2_HEADER_PADDING_SZ);
+
+    // Extents will start here
+    uint32_t extent_offset = total_file_size;
+    total_file_size  += (number_of_extents * sizeof(struct journal_extent_list));
+
+    uint32_t extent_offset_trailer = total_file_size;
+    total_file_size  += sizeof(struct journal_v2_block_trailer);
+
+    // UUID list will start here
+    uint32_t metrics_offset = total_file_size;
+    total_file_size  += (number_of_metrics * sizeof(struct journal_metric_list));
+
+    // UUID list trailer
+    uint32_t metric_offset_trailer = total_file_size;
+    total_file_size  += sizeof(struct journal_v2_block_trailer);
+
+    // descr @ time will start here
+    uint32_t pages_offset = total_file_size;
+    total_file_size  += (number_of_pages * (sizeof(struct journal_page_list) + sizeof(struct journal_page_header) + sizeof(struct journal_v2_block_trailer)));
+
+    // File trailer
+    uint32_t trailer_offset = total_file_size;
+    total_file_size  += sizeof(struct journal_v2_block_trailer);
+
+    int fd_v2;
+    uint8_t *data_start = netdata_mmap(path, total_file_size, MAP_SHARED, 0, false, &fd_v2);
+    uint8_t *data = data_start;
+
+    memset(data_start, 0, extent_offset);
+
+    // Write header
+    struct journal_v2_header j2_header;
+    memset(&j2_header, 0, sizeof(j2_header));
+
+    j2_header.magic = JOURVAL_V2_MAGIC;
+    j2_header.start_time_ut = 0;
+    j2_header.end_time_ut = 0;
+    j2_header.extent_count = number_of_extents;
+    j2_header.extent_offset = extent_offset;
+    j2_header.metric_count = number_of_metrics;
+    j2_header.metric_offset = metrics_offset;
+    j2_header.page_count = number_of_pages;
+    j2_header.page_offset = pages_offset;
+    j2_header.extent_trailer_offset = extent_offset_trailer;
+    j2_header.metric_trailer_offset = metric_offset_trailer;
+    j2_header.journal_v2_file_size = total_file_size;
+    j2_header.journal_v1_file_size = (uint32_t)journalfile_current_size(journalfile);
+    j2_header.data = data_start;                        // Used during migration
+
+    struct journal_v2_block_trailer *journal_v2_trailer;
+
+    data = journalfile_v2_write_extent_list(JudyL_extents_pos, data_start + extent_offset);
+    internal_error(true, "DBENGINE: write extent list so far %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+
+    fatal_assert(data == data_start + extent_offset_trailer);
+
+    // Calculate CRC for extents
+    journal_v2_trailer = (struct journal_v2_block_trailer *) (data_start + extent_offset_trailer);
+    uLong crc;
+    crc = crc32(0L, Z_NULL, 0);
+    crc = crc32(crc, (uint8_t *) data_start + extent_offset, number_of_extents * sizeof(struct journal_extent_list));
+    crc32set(journal_v2_trailer->checksum, crc);
+
+    internal_error(true, "DBENGINE: CALCULATE CRC FOR EXTENT %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+    // Skip the trailer, point to the metrics off
+    data += sizeof(struct journal_v2_block_trailer);
+
+    // Sanity check -- we must be at the metrics_offset
+    fatal_assert(data == data_start + metrics_offset);
+
+    // Allocate array to sort UUIDs and keep them sorted in the journal because we want to do binary search when we do lookups
+    struct journal_metric_list_to_sort *uuid_list = mallocz(number_of_metrics * sizeof(struct journal_metric_list_to_sort));
+
+    Word_t Index = 0;
+    size_t count = 0;
+    bool first_then_next = true;
+    while ((PValue = JudyLFirstThenNext(JudyL_metrics, &Index, &first_then_next))) {
+        metric_info = *PValue;
+
+        fatal_assert(count < number_of_metrics);
+        uuid_list[count++].metric_info = metric_info;
+        min_time_s = MIN(min_time_s, metric_info->first_time_s);
+        max_time_s = MAX(max_time_s, metric_info->last_time_s);
+    }
+
+    // Store in the header
+    j2_header.start_time_ut = min_time_s * USEC_PER_SEC;
+    j2_header.end_time_ut = max_time_s * USEC_PER_SEC;
+
+    qsort(&uuid_list[0], number_of_metrics, sizeof(struct journal_metric_list_to_sort), journalfile_metric_compare);
+    internal_error(true, "DBENGINE: traverse and qsort  UUID %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+
+    uint32_t resize_file_to = total_file_size;
+
+    for (Index = 0; Index < number_of_metrics; Index++) {
+        metric_info = uuid_list[Index].metric_info;
+
+        // Calculate current UUID offset from start of file. We will store this in the data page header
+        uint32_t uuid_offset = data - data_start;
+
+        // Write the UUID we are processing
+        data  = (void *) journalfile_v2_write_metric_page(&j2_header, data, metric_info, pages_offset);
+        if (unlikely(!data))
+            break;
+
+        // Next we will write
+        //   Header
+        //   Detailed entries (descr @ time)
+        //   Trailer (checksum)
+
+        // Keep the page_list_header, to be used for migration when where agent is running
+        metric_info->page_list_header = pages_offset;
+        // Write page header
+        void *metric_page = journalfile_v2_write_data_page_header(&j2_header, data_start + pages_offset, metric_info,
+                                                                  uuid_offset);
+
+        // Start writing descr @ time
+        void *page_trailer = journalfile_v2_write_descriptors(&j2_header, metric_page, metric_info);
+        if (unlikely(!page_trailer))
+            break;
+
+        // Trailer (checksum)
+        uint8_t *next_page_address = journalfile_v2_write_data_page_trailer(&j2_header, page_trailer,
+                                                                            data_start + pages_offset);
+
+        // Calculate start of the pages start for next descriptor
+        pages_offset += (metric_info->number_of_pages * (sizeof(struct journal_page_list)) + sizeof(struct journal_page_header) + sizeof(struct journal_v2_block_trailer));
+        // Verify we are at the right location
+        if (pages_offset != (uint32_t)(next_page_address - data_start)) {
+            // make sure checks fail so that we abort
+            data = data_start;
+            break;
+        }
+    }
+
+    if (data == data_start + metric_offset_trailer) {
+        internal_error(true, "DBENGINE: WRITE METRICS AND PAGES  %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+
+        // Calculate CRC for metrics
+        journal_v2_trailer = (struct journal_v2_block_trailer *)(data_start + metric_offset_trailer);
+        crc = crc32(0L, Z_NULL, 0);
+        crc =
+            crc32(crc, (uint8_t *)data_start + metrics_offset, number_of_metrics * sizeof(struct journal_metric_list));
+        crc32set(journal_v2_trailer->checksum, crc);
+        internal_error(true, "DBENGINE: CALCULATE CRC FOR UUIDs  %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+
+        // Prepare to write checksum for the file
+        j2_header.data = NULL;
+        journal_v2_trailer = (struct journal_v2_block_trailer *)(data_start + trailer_offset);
+        crc = crc32(0L, Z_NULL, 0);
+        crc = crc32(crc, (void *)&j2_header, sizeof(j2_header));
+        crc32set(journal_v2_trailer->checksum, crc);
+
+        // Write header to the file
+        memcpy(data_start, &j2_header, sizeof(j2_header));
+
+        internal_error(true, "DBENGINE: FILE COMPLETED --------> %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+
+        info("DBENGINE: migrated journal file '%s', file size %zu", path, total_file_size);
+
+        // msync(data_start, total_file_size, MS_SYNC);
+        journalfile_v2_data_set(journalfile, fd_v2, data_start, total_file_size);
+
+        internal_error(true, "DBENGINE: ACTIVATING NEW INDEX JNL %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
+        ctx_current_disk_space_increase(ctx, total_file_size);
+        freez(uuid_list);
+        return;
+    }
+    else {
+        info("DBENGINE: failed to build index '%s', file will be skipped", path);
+        j2_header.data = NULL;
+        j2_header.magic = JOURVAL_V2_SKIP_MAGIC;
+        memcpy(data_start, &j2_header, sizeof(j2_header));
+        resize_file_to = sizeof(j2_header);
+    }
+
+    netdata_munmap(data_start, total_file_size);
+    freez(uuid_list);
+
+    if (likely(resize_file_to == total_file_size))
+        return;
+
+    int ret = truncate(path, (long) resize_file_to);
+    if (ret < 0) {
+        ctx_current_disk_space_increase(ctx, total_file_size);
+        ctx_fs_error(ctx);
+        error("DBENGINE: failed to resize file '%s'", path);
+    }
+    else
+        ctx_current_disk_space_increase(ctx, resize_file_to);
+}
+
+int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
+                     struct rrdengine_datafile *datafile)
 {
     uv_fs_t req;
     uv_file file;
     int ret, fd, error;
     uint64_t file_size, max_id;
     char path[RRDENG_PATH_MAX];
+    bool loaded_v2 = false;
+
+    // Do not try to load jv2 of the latest file
+    if (datafile->fileno != ctx_last_fileno_get(ctx))
+        loaded_v2 = journalfile_v2_load(ctx, journalfile, datafile) == 0;
 
-    generate_journalfilepath(datafile, path, sizeof(path));
-    fd = open_file_direct_io(path, O_RDWR, &file);
+    journalfile_v1_generate_path(datafile, path, sizeof(path));
+
+    fd = open_file_for_io(path, O_RDWR, &file, use_direct_io);
     if (fd < 0) {
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        ctx_fs_error(ctx);
+
+        if(loaded_v2)
+            return 0;
+
         return fd;
     }
-    info("Loading journal file \"%s\".", path);
 
     ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb));
-    if (ret)
-        goto error;
-    file_size = ALIGN_BYTES_FLOOR(file_size);
+    if (ret) {
+        error = ret;
+        goto cleanup;
+    }
 
-    ret = check_journal_file_superblock(file);
-    if (ret)
-        goto error;
-    ctx->stats.io_read_bytes += sizeof(struct rrdeng_jf_sb);
-    ++ctx->stats.io_read_requests;
+    if(loaded_v2) {
+        journalfile->unsafe.pos = file_size;
+        error = 0;
+        goto cleanup;
+    }
 
+    file_size = ALIGN_BYTES_FLOOR(file_size);
+    journalfile->unsafe.pos = file_size;
     journalfile->file = file;
-    journalfile->pos = file_size;
-    journalfile->data = netdata_mmap(path, file_size, MAP_SHARED, 0);
-    info("Loading journal file \"%s\" using %s.", path, journalfile->data?"MMAP":"uv_fs_read");
 
-    max_id = iterate_transactions(ctx, journalfile);
+    ret = journalfile_check_superblock(file);
+    if (ret) {
+        info("DBENGINE: invalid journal file '%s' ; superblock check failed.", path);
+        error = ret;
+        goto cleanup;
+    }
+    ctx_io_read_op_bytes(ctx, sizeof(struct rrdeng_jf_sb));
+
+    info("DBENGINE: loading journal file '%s'", path);
 
-    ctx->commit_log.transaction_id = MAX(ctx->commit_log.transaction_id, max_id + 1);
+    max_id = journalfile_iterate_transactions(ctx, journalfile);
+
+    __atomic_store_n(&ctx->atomic.transaction_id, MAX(__atomic_load_n(&ctx->atomic.transaction_id, __ATOMIC_RELAXED), max_id + 1), __ATOMIC_RELAXED);
+
+    info("DBENGINE: journal file '%s' loaded (size:%"PRIu64").", path, file_size);
+
+    bool is_last_file = (ctx_last_fileno_get(ctx) == journalfile->datafile->fileno);
+    if (is_last_file && journalfile->datafile->pos <= rrdeng_target_data_file_size(ctx) / 3) {
+        ctx->loading.create_new_datafile_pair = false;
+        return 0;
+    }
+
+    pgc_open_cache_to_journal_v2(open_cache, (Word_t) ctx, (int) datafile->fileno, ctx->config.page_type,
+                                 journalfile_migrate_to_v2_callback, (void *) datafile->journalfile);
+
+    if (is_last_file)
+        ctx->loading.create_new_datafile_pair = true;
 
-    info("Journal file \"%s\" loaded (size:%"PRIu64").", path, file_size);
-    if (likely(journalfile->data))
-        netdata_munmap(journalfile->data, file_size);
     return 0;
 
-    error:
-    error = ret;
+cleanup:
     ret = uv_fs_close(NULL, &req, file, NULL);
     if (ret < 0) {
-        error("uv_fs_close(%s): %s", path, uv_strerror(ret));
-        ++ctx->stats.fs_errors;
-        rrd_stat_atomic_add(&global_fs_errors, 1);
+        error("DBENGINE: uv_fs_close(%s): %s", path, uv_strerror(ret));
+        ctx_fs_error(ctx);
     }
     uv_fs_req_cleanup(&req);
     return error;
 }
-
-void init_commit_log(struct rrdengine_instance *ctx)
-{
-    ctx->commit_log.buf = NULL;
-    ctx->commit_log.buf_pos = 0;
-    ctx->commit_log.transaction_id = 1;
-}
-- 
cgit v1.2.3