summaryrefslogtreecommitdiffstats
path: root/database/engine
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--database/engine/Makefile.am11
-rw-r--r--database/engine/README.md192
-rw-r--r--database/engine/cache.c2746
-rw-r--r--database/engine/cache.h250
-rw-r--r--database/engine/datafile.c611
-rw-r--r--database/engine/datafile.ksy74
-rw-r--r--database/engine/journalfile_v2.ksy.in150
-rw-r--r--database/engine/metric.c873
-rw-r--r--database/engine/pagecache.h62
-rw-r--r--database/engine/rrdenginelib.c161
-rw-r--r--src/database/engine/datafile.h (renamed from database/engine/datafile.h)0
-rw-r--r--src/database/engine/dbengine-diagram.xml (renamed from database/engine/dbengine-diagram.xml)0
-rw-r--r--src/database/engine/journalfile.c (renamed from database/engine/journalfile.c)59
-rw-r--r--src/database/engine/journalfile.h (renamed from database/engine/journalfile.h)5
-rw-r--r--src/database/engine/metric.h (renamed from database/engine/metric.h)23
-rw-r--r--src/database/engine/page.c (renamed from database/engine/page.c)108
-rw-r--r--src/database/engine/page.h (renamed from database/engine/page.h)0
-rw-r--r--src/database/engine/page_test.cc (renamed from database/engine/page_test.cc)0
-rw-r--r--src/database/engine/page_test.h (renamed from database/engine/page_test.h)0
-rw-r--r--src/database/engine/pagecache.c (renamed from database/engine/pagecache.c)26
-rw-r--r--src/database/engine/pdc.c (renamed from database/engine/pdc.c)97
-rw-r--r--src/database/engine/pdc.h (renamed from database/engine/pdc.h)0
-rw-r--r--src/database/engine/rrddiskprotocol.h (renamed from database/engine/rrddiskprotocol.h)16
-rw-r--r--src/database/engine/rrdengine.c (renamed from database/engine/rrdengine.c)348
-rw-r--r--src/database/engine/rrdengine.h (renamed from database/engine/rrdengine.h)77
-rwxr-xr-xsrc/database/engine/rrdengineapi.c (renamed from database/engine/rrdengineapi.c)380
-rw-r--r--src/database/engine/rrdengineapi.h (renamed from database/engine/rrdengineapi.h)60
-rw-r--r--src/database/engine/rrdenginelib.h (renamed from database/engine/rrdenginelib.h)2
-rw-r--r--src/go/collectors/go.d.plugin/agent/testdata/agent-empty.conf (renamed from database/engine/metadata_log/README.md)0
29 files changed, 760 insertions, 5571 deletions
diff --git a/database/engine/Makefile.am b/database/engine/Makefile.am
deleted file mode 100644
index 59250a997..000000000
--- a/database/engine/Makefile.am
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-AUTOMAKE_OPTIONS = subdir-objects
-MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
-
-SUBDIRS = \
- $(NULL)
-
-dist_noinst_DATA = \
- README.md \
- $(NULL)
diff --git a/database/engine/README.md b/database/engine/README.md
deleted file mode 100644
index 890018642..000000000
--- a/database/engine/README.md
+++ /dev/null
@@ -1,192 +0,0 @@
-# Database engine
-
-DBENGINE is the time-series database of Netdata.
-
-![image](https://user-images.githubusercontent.com/2662304/233838474-d4f8f0b9-61dc-4409-a708-97d403cd153a.png)
-
-## Design
-
-### Data Points
-
-**Data points** represent the collected values of metrics.
-
-A **data point** has:
-
-1. A **value**, the data collected for a metric. There is a special **value** to indicate that the collector failed to collect a valid value, and thus the data point is a **gap**.
-2. A **timestamp**, the time it has been collected.
-3. A **duration**, the time between this and the previous data collection.
-4. A flag which is set when machine-learning categorized the collected value as **anomalous** (an outlier based on the trained models).
-
-Using the **timestamp** and **duration**, Netdata calculates for each point its **start time**, **end time** and **update every**.
-
-For incremental metrics (counters), Netdata interpolates the collected values to align them to the expected **end time** at the microsecond level, absorbing data collection micro-latencies.
-
-When data points are stored in higher tiers (time aggregations - see [Tiers](#Tiers) below), each data point has:
-
-1. The **sum** of the original values that have been aggregated,
-2. The **count** of all the original values aggregated,
-3. The **minimum** value among them,
-4. The **maximum** value among them,
-5. Their **anomaly rate**, i.e. the count of values that were detected as outliers based on the currently trained models for the metric,
-6. A **timestamp**, which is the equal to the **end time** of the last point aggregated,
-7. A **duration**, which is the duration between the **first time** of the first point aggregated to the **end time** of the last point aggregated.
-
-This design allows Netdata to accurately know the **average**, **minimum**, **maximum** and **anomaly rate** values even when using higher tiers to satisfy a query.
-
-### Pages
-Data points are organized into **pages**, i.e. segments of contiguous data collections of the same metric.
-
-Each page:
-
-1. Contains contiguous **data points** of a single metric.
-2. Contains **data points** having the same **update every**. If a metric changes **update every** on the fly, the page is flushed and a new one with the new **update every** is created. If a data collection is missed, a **gap point** is inserted into the page, so that the data points in a page remain contiguous.
-3. Has a **start time**, which is equivalent to the **end time** of the first data point stored into it,
-4. Has an **end time**, which is equal to the **end time** of the last data point stored into it,
-5. Has an **update every**, common for all points in the page.
-
-A **page** is a simple array of values. Each slot in the array has a **timestamp** implied by its position in the array, and each value stored represents the **data point** for that time, for the metric the page belongs to.
-
-This simple fixed step page design allows Netdata to collect several millions of points per second and pack all the values in a compact form with minimal metadata overhead.
-
-#### Hot Pages
-
-While a metric is collected, there is one **hot page** in memory for each of the configured tiers. Values collected for a metric are appended to its **hot page** until that page becomes full.
-
-#### Dirty Pages
-
-Once a **hot page** is full, it becomes a **dirty page**, and it is scheduled for immediate **flushing** (saving) to disk.
-
-#### Clean Pages
-
-Flushed (saved) pages are **clean pages**, i.e. read-only pages that reside primarily on disk, and are loaded on demand to satisfy data queries.
-
-#### Pages Configuration
-
-Pages are configured like this:
-
-| Attribute | Tier0 | Tier1 | Tier2 |
-|---------------------------------------------------------------------------------------|:-------------------------------------:|:---------------------------------------------------------------:|:---------------------------------------------------------------:|
-| Point Size in Memory, in Bytes | 4 | 16 | 16 |
-| Point Size on Disk, in Bytes<br/><small>after LZ4 compression, on the average</small> | 1 | 4 | 4 |
-| Page Size in Bytes | 4096<br/><small>2048 in 32bit</small> | 2048<br/><small>1024 in 32bit</small> | 384<br/><small>192 in 32bit</small> |
-| Collections per Point | 1 | 60x Tier0<br/><small>configurable in<br/>`netdata.conf`</small> | 60x Tier1<br/><small>configurable in<br/>`netdata.conf`</small> |
-| Points per Page | 1024<br/><small>512 in 32bit</small> | 128<br/><small>64 in 32bit</small> | 24<br/><small>12 in 32bit</small> |
-
-### Files
-
-To minimize the amount of data written to disk and the amount of storage required for storing metrics, Netdata aggregates up to 64 **dirty pages** of independent metrics, packs them all together into one bigger buffer, compresses this buffer with LZ4 (about 75% savings on the average) and commits a transaction to the disk files.
-
-#### Extents
-
-This collection of 64 pages that is packed and compressed together is called an **extent**. Netdata tries to store together, in the same **extent**, metrics that are meant to be "close". Dimensions of the same chart are such. They are usually queried together, so it is beneficial to have them in the same **extent** to read all of them at once at query time.
-
-#### Datafiles
-
-Multiple **extents** are appended to **datafiles** (filename suffix `.ndf`), until these **datafiles** become full. The size of each **datafile** is determined automatically by Netdata. The minimum for each **datafile** is 4MB and the maximum 512MB. Depending on the amount of disk space configured for each tier, Netdata will decide a **datafile** size trying to maintain about 50 datafiles for the whole database, within the limits mentioned (4MB min, 512MB max per file). The maximum number of datafiles supported is 65536, and therefore the maximum database size (per tier) that Netdata can support is 32TB.
-
-#### Journal Files
-
-Each **datafile** has two **journal files** with metadata related to the stored data in the **datafile**.
-
-- **journal file v1**, with filename suffix `.njf`, holds information about the transactions in its **datafile** and provides the ability to recover as much data as possible, in case either the datafile or the journal files get corrupted. This journal file has a maximum transaction size of 4KB, so in case data are corrupted on disk transactions of 4KB are lost. Each transaction holds the metadata of one **extent** (this is why DBENGINE supports up to 64 pages per extent).
-
-- **journal file v2**, with filename suffix `.njfv2`, which is a disk-based index for all the **pages** and **extents**. This file is memory mapped at runtime and is consulted to find where the data of a metric are in the datafile. This journal file is automatically re-created from **journal file v1** if it is missing. It is safe to delete these files (when Netdata does not run). Netdata will re-create them on the next run. Journal files v2 are supported in Netdata Agents with version `netdata-1.37.0-115-nightly`. Older versions maintain the journal index in memory.
-
-#### Database Rotation
-
-Database rotation is achieved by deleting the oldest **datafile** (and its journals) and creating a new one (with its journals).
-
-Data on disk are append-only. There is no way to delete, add, or update data in the middle of the database. If data are not useful for whatever reason, Netdata can be instructed to ignore these data. They will eventually be deleted from disk when the database is rotated. New data are always appended.
-
-#### Tiers
-
-Tiers are supported in Netdata Agents with version `netdata-1.35.0.138.nightly` and greater.
-
-**datafiles** and **journal files** are organized in **tiers**. All tiers share the same metrics and same collected values.
-
-- **tier 0** is the high resolution tier that stores the collected data at the frequency they are collected.
-- **tier 1** by default aggregates 60 values of **tier 0**.
-- **tier 2** by default aggregates 60 values of **tier 1**, or 3600 values of **tier 0**.
-
-Updating the higher **tiers** is automated, and it happens in real-time while data are being collected for **tier 0**.
-
-When the Netdata Agent starts, during the first data collection of each metric, higher tiers are automatically **backfilled** with
-data from lower tiers, so that the aggregation they provide will be accurate.
-
-Configuring how the number of tiers and the disk space allocated to each tier is how you can
-[change how long netdata stores metrics](https://github.com/netdata/netdata/blob/master/docs/store/change-metrics-storage.md).
-
-### Data loss
-
-Until **hot pages** and **dirty pages** are **flushed** to disk they are at risk (e.g. due to a crash, or
-power failure), as they are stored only in memory.
-
-The supported way of ensuring high data availability is the use of Netdata Parents to stream the data in real-time to
-multiple other Netdata agents.
-
-## Memory requirements and retention
-
-See (change how long netdata stores metrics)[https://github.com/netdata/netdata/edit/master/docs/store/change-metrics-storage.md]
-
-#### Exceptions
-
-Netdata has several protection mechanisms to prevent the use of more memory (than the above), by incrementally fetching data from disk and aggressively evicting old data to make room for new data, but still memory may grow beyond the above limit under the following conditions:
-
-1. The number of pages concurrently used in queries do not fit the in the above size. This can happen when multiple queries of unreasonably long time-frames run on lower, higher resolution, tiers. The Netdata query planner attempts to avoid such situations by gradually loading pages, but still under extreme conditions the system may use more memory to satisfy these queries.
-
-2. The disks that host Netdata files are extremely slow for the workload required by the database so that data cannot be flushed to disk quickly to free memory. Netdata will automatically spawn more flushing workers in an attempt to parallelize and speed up flushing, but still if the disks cannot write the data quickly enough, they will remain in memory until they are written to disk.
-
-### Caches
-
-DBENGINE stores metric data to disk. To achieve high performance even under severe stress, it uses several layers of caches.
-
-#### Main Cache
-
-Stores page data. It is the primary storage of hot and dirty pages (before they are saved to disk), and its clean queue is the LRU cache for speeding up queries.
-
-The entire DBENGINE is designed to use the hot queue size (the currently collected metrics) as the key for sizing all its memory consumption. We call this feature **memory ballooning**. More collected metrics, bigger main cache and vice versa.
-
-In the equation:
-
-```
-memory in KiB = METRICS x (TIERS - 1) x 4KiB x 2 + 32768 KiB
-```
-
-the part `METRICS x (TIERS - 1) x 4KiB` is an estimate for the max hot size of the main cache. Tier 0 pages are 4KiB, but tier 1 pages are 2 KiB and tier 2 pages are 384 bytes. So a single metric in 3 tiers uses 4096 + 2048 + 384 = 6528 bytes. The equation estimates 8192 per metric, which includes cache internal structures and leaves some spare.
-
-Then `x 2` is the worst case estimate for the dirty queue. If all collected metrics (hot) become available for saving at once, to avoid stopping data collection all their pages will become dirty and new hot pages will be created instantly. To save memory, when Netdata starts, DBENGINE allocates randomly smaller pages for metrics, to spread their completion evenly across time.
-
-The memory we saved with the above is used to improve the LRU cache. So, although we reserved 32MiB for the LRU, in bigger setups (Netdata Parents) the LRU grows a lot more, within the limits of the equation.
-
-In practice, the main cache sizes itself with `hot x 1.5` instead of `host x 2`. The reason is that 5% of main cache is reserved for expanding open cache, 5% for expanding extent cache and we need room for the extensive buffers that are allocated in these setups. When the main cache exceeds `hot x 1.5` it enters a mode of critical evictions, and aggresively frees pages from the LRU to maintain a healthy memory footprint within its design limits.
-
-#### Open Cache
-
-Stores metadata about on disk pages. Not the data itself. Only metadata about the location of the data on disk.
-
-Its primary use is to index information about the open datafile, the one that still accepts new pages. Once that datafile becomes full, all the hot pages of the open cache are indexed in journal v2 files.
-
-The clean queue is an LRU for reducing the journal v2 scans during quering.
-
-Open cache uses memory ballooning too, like the main cache, based on its own hot pages. Open cache hot size is mainly controlled by the size of the open datafile. This is why on netdata versions with journal files v2, we decreased the maximum datafile size from 1GB to 512MB and we increased the target number of datafiles from 20 to 50.
-
-On bigger setups open cache will get a bigger LRU by automatically sizing it (the whole open cache) to 5% to the size of (the whole) main cache.
-
-#### Extent Cache
-
-Caches compressed **extent** data, to avoid reading too repeatedly the same data from disks.
-
-
-### Shared Memory
-
-Journal v2 indexes are mapped into memory. Netdata attempts to minimize shared memory use by instructing the kernel about the use of these files, or even unmounting them when they are not needed.
-
-The time-ranges of the queries running control the amount of shared memory required.
-
-## Metrics Registry
-
-DBENGINE uses 150 bytes of memory for every metric for which retention is maintained but is not currently being collected.
-
-
-
-
diff --git a/database/engine/cache.c b/database/engine/cache.c
deleted file mode 100644
index eb1c35298..000000000
--- a/database/engine/cache.c
+++ /dev/null
@@ -1,2746 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-#include "cache.h"
-
-/* STATES AND TRANSITIONS
- *
- * entry | entry
- * v v
- * HOT -> DIRTY --> CLEAN --> EVICT
- * v | v
- * flush | evict
- * v | v
- * save | free
- * callback | callback
- *
- */
-
-typedef int32_t REFCOUNT;
-#define REFCOUNT_DELETING (-100)
-
-// to use ARAL uncomment the following line:
-#define PGC_WITH_ARAL 1
-
-typedef enum __attribute__ ((__packed__)) {
- // mutually exclusive flags
- PGC_PAGE_CLEAN = (1 << 0), // none of the following
- PGC_PAGE_DIRTY = (1 << 1), // contains unsaved data
- PGC_PAGE_HOT = (1 << 2), // currently being collected
-
- // flags related to various actions on each page
- PGC_PAGE_IS_BEING_DELETED = (1 << 3),
- PGC_PAGE_IS_BEING_MIGRATED_TO_V2 = (1 << 4),
- PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES = (1 << 5),
- PGC_PAGE_HAS_BEEN_ACCESSED = (1 << 6),
-} PGC_PAGE_FLAGS;
-
-#define page_flag_check(page, flag) (__atomic_load_n(&((page)->flags), __ATOMIC_ACQUIRE) & (flag))
-#define page_flag_set(page, flag) __atomic_or_fetch(&((page)->flags), flag, __ATOMIC_RELEASE)
-#define page_flag_clear(page, flag) __atomic_and_fetch(&((page)->flags), ~(flag), __ATOMIC_RELEASE)
-
-#define page_get_status_flags(page) page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN)
-#define is_page_hot(page) (page_get_status_flags(page) == PGC_PAGE_HOT)
-#define is_page_dirty(page) (page_get_status_flags(page) == PGC_PAGE_DIRTY)
-#define is_page_clean(page) (page_get_status_flags(page) == PGC_PAGE_CLEAN)
-
-struct pgc_page {
- // indexing data
- Word_t section;
- Word_t metric_id;
- time_t start_time_s;
- time_t end_time_s;
- uint32_t update_every_s;
- uint32_t assumed_size;
-
- REFCOUNT refcount;
- uint16_t accesses; // counts the number of accesses on this page
- PGC_PAGE_FLAGS flags;
- SPINLOCK transition_spinlock; // when the page changes between HOT, DIRTY, CLEAN, we have to get this lock
-
- struct {
- struct pgc_page *next;
- struct pgc_page *prev;
- } link;
-
- void *data;
- uint8_t custom_data[];
-
- // IMPORTANT!
- // THIS STRUCTURE NEEDS TO BE INITIALIZED BY HAND!
-};
-
-struct pgc_linked_list {
- SPINLOCK spinlock;
- union {
- PGC_PAGE *base;
- Pvoid_t sections_judy;
- };
- PGC_PAGE_FLAGS flags;
- size_t version;
- size_t last_version_checked;
- bool linked_list_in_sections_judy; // when true, we use 'sections_judy', otherwise we use 'base'
- struct pgc_queue_statistics *stats;
-};
-
-struct pgc {
- struct {
- char name[PGC_NAME_MAX + 1];
-
- size_t partitions;
- size_t clean_size;
- size_t max_dirty_pages_per_call;
- size_t max_pages_per_inline_eviction;
- size_t max_skip_pages_per_inline_eviction;
- size_t max_flushes_inline;
- size_t max_workers_evict_inline;
- size_t additional_bytes_per_page;
- free_clean_page_callback pgc_free_clean_cb;
- save_dirty_page_callback pgc_save_dirty_cb;
- save_dirty_init_callback pgc_save_init_cb;
- PGC_OPTIONS options;
-
- size_t severe_pressure_per1000;
- size_t aggressive_evict_per1000;
- size_t healthy_size_per1000;
- size_t evict_low_threshold_per1000;
-
- dynamic_target_cache_size_callback dynamic_target_size_cb;
- } config;
-
-#ifdef PGC_WITH_ARAL
- ARAL **aral;
-#endif
-
- PGC_CACHE_LINE_PADDING(0);
-
- struct pgc_index {
- RW_SPINLOCK rw_spinlock;
- Pvoid_t sections_judy;
- PGC_CACHE_LINE_PADDING(0);
- } *index;
-
- PGC_CACHE_LINE_PADDING(1);
-
- struct {
- SPINLOCK spinlock;
- size_t per1000;
- } usage;
-
- PGC_CACHE_LINE_PADDING(2);
-
- struct pgc_linked_list clean; // LRU is applied here to free memory from the cache
-
- PGC_CACHE_LINE_PADDING(3);
-
- struct pgc_linked_list dirty; // in the dirty list, pages are ordered the way they were marked dirty
-
- PGC_CACHE_LINE_PADDING(4);
-
- struct pgc_linked_list hot; // in the hot list, pages are order the way they were marked hot
-
- PGC_CACHE_LINE_PADDING(5);
-
- struct pgc_statistics stats; // statistics
-
-#ifdef NETDATA_PGC_POINTER_CHECK
- PGC_CACHE_LINE_PADDING(6);
- netdata_mutex_t global_pointer_registry_mutex;
- Pvoid_t global_pointer_registry;
-#endif
-};
-
-
-
-// ----------------------------------------------------------------------------
-// validate each pointer is indexed once - internal checks only
-
-static inline void pointer_index_init(PGC *cache __maybe_unused) {
-#ifdef NETDATA_PGC_POINTER_CHECK
- netdata_mutex_init(&cache->global_pointer_registry_mutex);
-#else
- ;
-#endif
-}
-
-static inline void pointer_destroy_index(PGC *cache __maybe_unused) {
-#ifdef NETDATA_PGC_POINTER_CHECK
- netdata_mutex_lock(&cache->global_pointer_registry_mutex);
- JudyHSFreeArray(&cache->global_pointer_registry, PJE0);
- netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
-#else
- ;
-#endif
-}
-static inline void pointer_add(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
-#ifdef NETDATA_PGC_POINTER_CHECK
- netdata_mutex_lock(&cache->global_pointer_registry_mutex);
- Pvoid_t *PValue = JudyHSIns(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
- if(*PValue != NULL)
- fatal("pointer already exists in registry");
- *PValue = page;
- netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
-#else
- ;
-#endif
-}
-
-static inline void pointer_check(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
-#ifdef NETDATA_PGC_POINTER_CHECK
- netdata_mutex_lock(&cache->global_pointer_registry_mutex);
- Pvoid_t *PValue = JudyHSGet(cache->global_pointer_registry, &page, sizeof(void *));
- if(PValue == NULL)
- fatal("pointer is not found in registry");
- netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
-#else
- ;
-#endif
-}
-
-static inline void pointer_del(PGC *cache __maybe_unused, PGC_PAGE *page __maybe_unused) {
-#ifdef NETDATA_PGC_POINTER_CHECK
- netdata_mutex_lock(&cache->global_pointer_registry_mutex);
- int ret = JudyHSDel(&cache->global_pointer_registry, &page, sizeof(void *), PJE0);
- if(!ret)
- fatal("pointer to be deleted does not exist in registry");
- netdata_mutex_unlock(&cache->global_pointer_registry_mutex);
-#else
- ;
-#endif
-}
-
-// ----------------------------------------------------------------------------
-// locking
-
-static inline size_t pgc_indexing_partition(PGC *cache, Word_t metric_id) {
- static __thread Word_t last_metric_id = 0;
- static __thread size_t last_partition = 0;
-
- if(metric_id == last_metric_id || cache->config.partitions == 1)
- return last_partition;
-
- last_metric_id = metric_id;
- last_partition = indexing_partition(metric_id, cache->config.partitions);
-
- return last_partition;
-}
-
-static inline void pgc_index_read_lock(PGC *cache, size_t partition) {
- rw_spinlock_read_lock(&cache->index[partition].rw_spinlock);
-}
-static inline void pgc_index_read_unlock(PGC *cache, size_t partition) {
- rw_spinlock_read_unlock(&cache->index[partition].rw_spinlock);
-}
-static inline void pgc_index_write_lock(PGC *cache, size_t partition) {
- rw_spinlock_write_lock(&cache->index[partition].rw_spinlock);
-}
-static inline void pgc_index_write_unlock(PGC *cache, size_t partition) {
- rw_spinlock_write_unlock(&cache->index[partition].rw_spinlock);
-}
-
-static inline bool pgc_ll_trylock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
- return spinlock_trylock(&ll->spinlock);
-}
-
-static inline void pgc_ll_lock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
- spinlock_lock(&ll->spinlock);
-}
-
-static inline void pgc_ll_unlock(PGC *cache __maybe_unused, struct pgc_linked_list *ll) {
- spinlock_unlock(&ll->spinlock);
-}
-
-static inline bool page_transition_trylock(PGC *cache __maybe_unused, PGC_PAGE *page) {
- return spinlock_trylock(&page->transition_spinlock);
-}
-
-static inline void page_transition_lock(PGC *cache __maybe_unused, PGC_PAGE *page) {
- spinlock_lock(&page->transition_spinlock);
-}
-
-static inline void page_transition_unlock(PGC *cache __maybe_unused, PGC_PAGE *page) {
- spinlock_unlock(&page->transition_spinlock);
-}
-
-// ----------------------------------------------------------------------------
-// evictions control
-
-static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) {
-
- if(size_to_evict)
- spinlock_lock(&cache->usage.spinlock);
-
- else if(!spinlock_trylock(&cache->usage.spinlock))
- return __atomic_load_n(&cache->usage.per1000, __ATOMIC_RELAXED);
-
- size_t current_cache_size;
- size_t wanted_cache_size;
- size_t per1000;
-
- size_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED);
- size_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
-
- if(cache->config.options & PGC_OPTIONS_AUTOSCALE) {
- size_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
- size_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
-
- // our promise to users
- size_t max_size1 = MAX(hot_max, hot) * 2;
-
- // protection against slow flushing
- size_t max_size2 = hot_max + ((dirty_max < hot_max / 2) ? hot_max / 2 : dirty_max * 2);
-
- // the final wanted cache size
- wanted_cache_size = MIN(max_size1, max_size2);
-
- if(cache->config.dynamic_target_size_cb) {
- size_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
- if(wanted_cache_size_cb > wanted_cache_size)
- wanted_cache_size = wanted_cache_size_cb;
- }
-
- if (wanted_cache_size < hot + dirty + cache->config.clean_size)
- wanted_cache_size = hot + dirty + cache->config.clean_size;
- }
- else
- wanted_cache_size = hot + dirty + cache->config.clean_size;
-
- // protection again huge queries
- // if huge queries are running, or huge amounts need to be saved
- // allow the cache to grow more (hot pages in main cache are also referenced)
- size_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED);
- if(unlikely(wanted_cache_size < referenced_size * 2 / 3))
- wanted_cache_size = referenced_size * 2 / 3;
-
- current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); // + pgc_aral_overhead();
-
- per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size);
-
- __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED);
- __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED);
- __atomic_store_n(&cache->stats.current_cache_size, current_cache_size, __ATOMIC_RELAXED);
-
- spinlock_unlock(&cache->usage.spinlock);
-
- if(size_to_evict) {
- size_t target = (size_t)((unsigned long long)wanted_cache_size * (unsigned long long)cache->config.evict_low_threshold_per1000 / 1000ULL);
- if(current_cache_size > target)
- *size_to_evict = current_cache_size - target;
- else
- *size_to_evict = 0;
- }
-
- if(per1000 >= cache->config.severe_pressure_per1000)
- __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED);
-
- else if(per1000 >= cache->config.aggressive_evict_per1000)
- __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED);
-
- return per1000;
-}
-
-static inline bool cache_pressure(PGC *cache, size_t limit) {
- return (cache_usage_per1000(cache, NULL) >= limit);
-}
-
-#define cache_under_severe_pressure(cache) cache_pressure(cache, (cache)->config.severe_pressure_per1000)
-#define cache_needs_space_aggressively(cache) cache_pressure(cache, (cache)->config.aggressive_evict_per1000)
-#define cache_above_healthy_limit(cache) cache_pressure(cache, (cache)->config.healthy_size_per1000)
-
-typedef bool (*evict_filter)(PGC_PAGE *page, void *data);
-static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data);
-#define evict_pages(cache, max_skip, max_evict, wait, all_of_them) evict_pages_with_filter(cache, max_skip, max_evict, wait, all_of_them, NULL, NULL)
-
-static inline void evict_on_clean_page_added(PGC *cache __maybe_unused) {
- if((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_needs_space_aggressively(cache)) {
- evict_pages(cache,
- cache->config.max_skip_pages_per_inline_eviction,
- cache->config.max_pages_per_inline_eviction,
- false, false);
- }
-}
-
-static inline void evict_on_page_release_when_permitted(PGC *cache __maybe_unused) {
- if ((cache->config.options & PGC_OPTIONS_EVICT_PAGES_INLINE) || cache_under_severe_pressure(cache)) {
- evict_pages(cache,
- cache->config.max_skip_pages_per_inline_eviction,
- cache->config.max_pages_per_inline_eviction,
- false, false);
- }
-}
-
-// ----------------------------------------------------------------------------
-// flushing control
-
-static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them);
-
-static inline bool flushing_critical(PGC *cache) {
- if(unlikely(__atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED) > __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED))) {
- __atomic_add_fetch(&cache->stats.events_flush_critical, 1, __ATOMIC_RELAXED);
- return true;
- }
-
- return false;
-}
-
-// ----------------------------------------------------------------------------
-// helpers
-
-static inline size_t page_assumed_size(PGC *cache, size_t size) {
- return size + (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
-}
-
-static inline size_t page_size_from_assumed_size(PGC *cache, size_t assumed_size) {
- return assumed_size - (sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page + sizeof(Word_t) * 3);
-}
-
-// ----------------------------------------------------------------------------
-// Linked list management
-
-static inline void atomic_set_max(size_t *max, size_t desired) {
- size_t expected;
-
- expected = __atomic_load_n(max, __ATOMIC_RELAXED);
-
- do {
-
- if(expected >= desired)
- return;
-
- } while(!__atomic_compare_exchange_n(max, &expected, desired,
- false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
-}
-
-struct section_pages {
- SPINLOCK migration_to_v2_spinlock;
- size_t entries;
- size_t size;
- PGC_PAGE *base;
-};
-
-static ARAL *pgc_section_pages_aral = NULL;
-static void pgc_section_pages_static_aral_init(void) {
- static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
-
- if(unlikely(!pgc_section_pages_aral)) {
- spinlock_lock(&spinlock);
-
- // we have to check again
- if(!pgc_section_pages_aral)
- pgc_section_pages_aral = aral_create(
- "pgc_section",
- sizeof(struct section_pages),
- 0,
- 65536, NULL,
- NULL, NULL, false, false);
-
- spinlock_unlock(&spinlock);
- }
-}
-
-static inline void pgc_stats_ll_judy_change(PGC *cache, struct pgc_linked_list *ll, size_t mem_before_judyl, size_t mem_after_judyl) {
- if(mem_after_judyl > mem_before_judyl) {
- __atomic_add_fetch(&ll->stats->size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
- }
- else if(mem_after_judyl < mem_before_judyl) {
- __atomic_sub_fetch(&ll->stats->size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
- }
-}
-
-static inline void pgc_stats_index_judy_change(PGC *cache, size_t mem_before_judyl, size_t mem_after_judyl) {
- if(mem_after_judyl > mem_before_judyl) {
- __atomic_add_fetch(&cache->stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
- }
- else if(mem_after_judyl < mem_before_judyl) {
- __atomic_sub_fetch(&cache->stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
- }
-}
-
-static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
- if(!having_lock)
- pgc_ll_lock(cache, ll);
-
- internal_fatal(page_get_status_flags(page) != 0,
- "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
- page_get_status_flags(page),
- 0);
-
- if(ll->linked_list_in_sections_judy) {
- size_t mem_before_judyl, mem_after_judyl;
-
- mem_before_judyl = JudyLMemUsed(ll->sections_judy);
- Pvoid_t *section_pages_pptr = JudyLIns(&ll->sections_judy, page->section, PJE0);
- mem_after_judyl = JudyLMemUsed(ll->sections_judy);
-
- struct section_pages *sp = *section_pages_pptr;
- if(!sp) {
- // sp = callocz(1, sizeof(struct section_pages));
- sp = aral_mallocz(pgc_section_pages_aral);
- memset(sp, 0, sizeof(struct section_pages));
-
- *section_pages_pptr = sp;
-
- mem_after_judyl += sizeof(struct section_pages);
- }
- pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
-
- sp->entries++;
- sp->size += page->assumed_size;
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
-
- if((sp->entries % cache->config.max_dirty_pages_per_call) == 0)
- ll->version++;
- }
- else {
- // CLEAN pages end up here.
- // - New pages created as CLEAN, always have 1 access.
- // - DIRTY pages made CLEAN, depending on their accesses may be appended (accesses > 0) or prepended (accesses = 0).
-
- if(page->accesses || page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED) {
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
- page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
- }
- else
- DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
-
- ll->version++;
- }
-
- page_flag_set(page, ll->flags);
-
- if(!having_lock)
- pgc_ll_unlock(cache, ll);
-
- size_t entries = __atomic_add_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
- size_t size = __atomic_add_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ll->stats->added_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ll->stats->added_size, page->assumed_size, __ATOMIC_RELAXED);
-
- atomic_set_max(&ll->stats->max_entries, entries);
- atomic_set_max(&ll->stats->max_size, size);
-}
-
-static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PGC_PAGE *page, bool having_lock) {
- __atomic_sub_fetch(&ll->stats->entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&ll->stats->size, page->assumed_size, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ll->stats->removed_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ll->stats->removed_size, page->assumed_size, __ATOMIC_RELAXED);
-
- if(!having_lock)
- pgc_ll_lock(cache, ll);
-
- internal_fatal(page_get_status_flags(page) != ll->flags,
- "DBENGINE CACHE: invalid page flags, the page has %d, but it is should be %d",
- page_get_status_flags(page),
- ll->flags);
-
- page_flag_clear(page, ll->flags);
-
- if(ll->linked_list_in_sections_judy) {
- Pvoid_t *section_pages_pptr = JudyLGet(ll->sections_judy, page->section, PJE0);
- internal_fatal(!section_pages_pptr, "DBENGINE CACHE: page should be in Judy LL, but it is not");
-
- struct section_pages *sp = *section_pages_pptr;
- sp->entries--;
- sp->size -= page->assumed_size;
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
-
- if(!sp->base) {
- size_t mem_before_judyl, mem_after_judyl;
-
- mem_before_judyl = JudyLMemUsed(ll->sections_judy);
- int rc = JudyLDel(&ll->sections_judy, page->section, PJE0);
- mem_after_judyl = JudyLMemUsed(ll->sections_judy);
-
- if(!rc)
- fatal("DBENGINE CACHE: cannot delete section from Judy LL");
-
- // freez(sp);
- aral_freez(pgc_section_pages_aral, sp);
- mem_after_judyl -= sizeof(struct section_pages);
- pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
- }
- }
- else {
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
- ll->version++;
- }
-
- if(!having_lock)
- pgc_ll_unlock(cache, ll);
-}
-
-static inline void page_has_been_accessed(PGC *cache, PGC_PAGE *page) {
- PGC_PAGE_FLAGS flags = page_flag_check(page, PGC_PAGE_CLEAN | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
-
- if (!(flags & PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES)) {
- __atomic_add_fetch(&page->accesses, 1, __ATOMIC_RELAXED);
-
- if (flags & PGC_PAGE_CLEAN) {
- if(pgc_ll_trylock(cache, &cache->clean)) {
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
- pgc_ll_unlock(cache, &cache->clean);
- page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
- }
- else
- page_flag_set(page, PGC_PAGE_HAS_BEEN_ACCESSED);
- }
- }
-}
-
-
-// ----------------------------------------------------------------------------
-// state transitions
-
-static inline void page_set_clean(PGC *cache, PGC_PAGE *page, bool having_transition_lock, bool having_clean_lock) {
- if(!having_transition_lock)
- page_transition_lock(cache, page);
-
- PGC_PAGE_FLAGS flags = page_get_status_flags(page);
-
- if(flags & PGC_PAGE_CLEAN) {
- if(!having_transition_lock)
- page_transition_unlock(cache, page);
- return;
- }
-
- if(flags & PGC_PAGE_HOT)
- pgc_ll_del(cache, &cache->hot, page, false);
-
- if(flags & PGC_PAGE_DIRTY)
- pgc_ll_del(cache, &cache->dirty, page, false);
-
- // first add to linked list, the set the flag (required for move_page_last())
- pgc_ll_add(cache, &cache->clean, page, having_clean_lock);
-
- if(!having_transition_lock)
- page_transition_unlock(cache, page);
-}
-
-static inline void page_set_dirty(PGC *cache, PGC_PAGE *page, bool having_hot_lock) {
- if(!having_hot_lock)
- // to avoid deadlocks, we have to get the hot lock before the page transition
- // since this is what all_hot_to_dirty() does
- pgc_ll_lock(cache, &cache->hot);
-
- page_transition_lock(cache, page);
-
- PGC_PAGE_FLAGS flags = page_get_status_flags(page);
-
- if(flags & PGC_PAGE_DIRTY) {
- page_transition_unlock(cache, page);
-
- if(!having_hot_lock)
- // we don't need the hot lock anymore
- pgc_ll_unlock(cache, &cache->hot);
-
- return;
- }
-
- __atomic_add_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
-
- if(likely(flags & PGC_PAGE_HOT))
- pgc_ll_del(cache, &cache->hot, page, true);
-
- if(!having_hot_lock)
- // we don't need the hot lock anymore
- pgc_ll_unlock(cache, &cache->hot);
-
- if(unlikely(flags & PGC_PAGE_CLEAN))
- pgc_ll_del(cache, &cache->clean, page, false);
-
- // first add to linked list, the set the flag (required for move_page_last())
- pgc_ll_add(cache, &cache->dirty, page, false);
-
- __atomic_sub_fetch(&cache->stats.hot2dirty_entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.hot2dirty_size, page->assumed_size, __ATOMIC_RELAXED);
-
- page_transition_unlock(cache, page);
-}
-
-static inline void page_set_hot(PGC *cache, PGC_PAGE *page) {
- page_transition_lock(cache, page);
-
- PGC_PAGE_FLAGS flags = page_get_status_flags(page);
-
- if(flags & PGC_PAGE_HOT) {
- page_transition_unlock(cache, page);
- return;
- }
-
- if(flags & PGC_PAGE_DIRTY)
- pgc_ll_del(cache, &cache->dirty, page, false);
-
- if(flags & PGC_PAGE_CLEAN)
- pgc_ll_del(cache, &cache->clean, page, false);
-
- // first add to linked list, the set the flag (required for move_page_last())
- pgc_ll_add(cache, &cache->hot, page, false);
-
- page_transition_unlock(cache, page);
-}
-
-
-// ----------------------------------------------------------------------------
-// Referencing
-
-static inline size_t PGC_REFERENCED_PAGES(PGC *cache) {
- return __atomic_load_n(&cache->stats.referenced_entries, __ATOMIC_RELAXED);
-}
-
-static inline void PGC_REFERENCED_PAGES_PLUS1(PGC *cache, PGC_PAGE *page) {
- __atomic_add_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.referenced_size, page->assumed_size, __ATOMIC_RELAXED);
-}
-
-static inline void PGC_REFERENCED_PAGES_MINUS1(PGC *cache, size_t assumed_size) {
- __atomic_sub_fetch(&cache->stats.referenced_entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.referenced_size, assumed_size, __ATOMIC_RELAXED);
-}
-
-// If the page is not already acquired,
-// YOU HAVE TO HAVE THE QUEUE (hot, dirty, clean) THE PAGE IS IN, L O C K E D !
-// If you don't have it locked, NOTHING PREVENTS THIS PAGE FOR VANISHING WHILE THIS IS CALLED!
-static inline bool page_acquire(PGC *cache, PGC_PAGE *page) {
- __atomic_add_fetch(&cache->stats.acquires, 1, __ATOMIC_RELAXED);
-
- REFCOUNT expected, desired;
-
- expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
- size_t spins = 0;
-
- do {
- spins++;
-
- if(unlikely(expected < 0))
- return false;
-
- desired = expected + 1;
-
- } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED));
-
- if(unlikely(spins > 1))
- __atomic_add_fetch(&cache->stats.acquire_spins, spins - 1, __ATOMIC_RELAXED);
-
- if(desired == 1)
- PGC_REFERENCED_PAGES_PLUS1(cache, page);
-
- return true;
-}
-
-static inline void page_release(PGC *cache, PGC_PAGE *page, bool evict_if_necessary) {
- __atomic_add_fetch(&cache->stats.releases, 1, __ATOMIC_RELAXED);
-
- size_t assumed_size = page->assumed_size; // take the size before we release it
- REFCOUNT expected, desired;
-
- expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
-
- size_t spins = 0;
- do {
- spins++;
-
- internal_fatal(expected <= 0,
- "DBENGINE CACHE: trying to release a page with reference counter %d", expected);
-
- desired = expected - 1;
-
- } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
-
- if(unlikely(spins > 1))
- __atomic_add_fetch(&cache->stats.release_spins, spins - 1, __ATOMIC_RELAXED);
-
- if(desired == 0) {
- PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
-
- if(evict_if_necessary)
- evict_on_page_release_when_permitted(cache);
- }
-}
-
-static inline bool non_acquired_page_get_for_deletion___while_having_clean_locked(PGC *cache __maybe_unused, PGC_PAGE *page) {
- __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
-
- internal_fatal(!is_page_clean(page),
- "DBENGINE CACHE: only clean pages can be deleted");
-
- REFCOUNT expected, desired;
-
- expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
- size_t spins = 0;
- bool delete_it;
-
- do {
- spins++;
-
- if (expected == 0) {
- desired = REFCOUNT_DELETING;
- delete_it = true;
- }
- else {
- delete_it = false;
- break;
- }
-
- } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
-
- if(delete_it) {
- // we can delete this page
- internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
- "DBENGINE CACHE: page is already being deleted");
-
- page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
- }
-
- if(unlikely(spins > 1))
- __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
-
- return delete_it;
-}
-
-static inline bool acquired_page_get_for_deletion_or_release_it(PGC *cache __maybe_unused, PGC_PAGE *page) {
- __atomic_add_fetch(&cache->stats.acquires_for_deletion, 1, __ATOMIC_RELAXED);
-
- size_t assumed_size = page->assumed_size; // take the size before we release it
-
- REFCOUNT expected, desired;
-
- expected = __atomic_load_n(&page->refcount, __ATOMIC_RELAXED);
- size_t spins = 0;
- bool delete_it;
-
- do {
- spins++;
-
- internal_fatal(expected < 1,
- "DBENGINE CACHE: page to be deleted should be acquired by the caller.");
-
- if (expected == 1) {
- // we are the only one having this page referenced
- desired = REFCOUNT_DELETING;
- delete_it = true;
- }
- else {
- // this page cannot be deleted
- desired = expected - 1;
- delete_it = false;
- }
-
- } while(!__atomic_compare_exchange_n(&page->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED));
-
- if(delete_it) {
- PGC_REFERENCED_PAGES_MINUS1(cache, assumed_size);
-
- // we can delete this page
- internal_fatal(page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
- "DBENGINE CACHE: page is already being deleted");
-
- page_flag_set(page, PGC_PAGE_IS_BEING_DELETED);
- }
-
- if(unlikely(spins > 1))
- __atomic_add_fetch(&cache->stats.delete_spins, spins - 1, __ATOMIC_RELAXED);
-
- return delete_it;
-}
-
-
-// ----------------------------------------------------------------------------
-// Indexing
-
-static inline void free_this_page(PGC *cache, PGC_PAGE *page, size_t partition __maybe_unused) {
- // call the callback to free the user supplied memory
- cache->config.pgc_free_clean_cb(cache, (PGC_ENTRY){
- .section = page->section,
- .metric_id = page->metric_id,
- .start_time_s = page->start_time_s,
- .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
- .update_every_s = page->update_every_s,
- .size = page_size_from_assumed_size(cache, page->assumed_size),
- .hot = (is_page_hot(page)) ? true : false,
- .data = page->data,
- .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
- });
-
- // update statistics
- __atomic_add_fetch(&cache->stats.removed_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.removed_size, page->assumed_size, __ATOMIC_RELAXED);
-
- __atomic_sub_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
-
- // free our memory
-#ifdef PGC_WITH_ARAL
- aral_freez(cache->aral[partition], page);
-#else
- freez(page);
-#endif
-}
-
-static void remove_this_page_from_index_unsafe(PGC *cache, PGC_PAGE *page, size_t partition) {
- // remove it from the Judy arrays
-
- pointer_check(cache, page);
-
- internal_fatal(page_flag_check(page, PGC_PAGE_HOT | PGC_PAGE_DIRTY | PGC_PAGE_CLEAN),
- "DBENGINE CACHE: page to be removed from the cache is still in the linked-list");
-
- internal_fatal(!page_flag_check(page, PGC_PAGE_IS_BEING_DELETED),
- "DBENGINE CACHE: page to be removed from the index, is not marked for deletion");
-
- internal_fatal(partition != pgc_indexing_partition(cache, page->metric_id),
- "DBENGINE CACHE: attempted to remove this page from the wrong partition of the cache");
-
- Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, page->section, PJE0);
- if(unlikely(!metrics_judy_pptr))
- fatal("DBENGINE CACHE: section '%lu' should exist, but it does not.", page->section);
-
- Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, page->metric_id, PJE0);
- if(unlikely(!pages_judy_pptr))
- fatal("DBENGINE CACHE: metric '%lu' in section '%lu' should exist, but it does not.",
- page->metric_id, page->section);
-
- Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, page->start_time_s, PJE0);
- if(unlikely(!page_ptr))
- fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but it does not.",
- page->start_time_s, page->metric_id, page->section);
-
- PGC_PAGE *found_page = *page_ptr;
- if(unlikely(found_page != page))
- fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' should exist, but the index returned a different address.",
- page->start_time_s, page->metric_id, page->section);
-
- size_t mem_before_judyl = 0, mem_after_judyl = 0;
-
- mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
- if(unlikely(!JudyLDel(pages_judy_pptr, page->start_time_s, PJE0)))
- fatal("DBENGINE CACHE: page with start time '%ld' of metric '%lu' in section '%lu' exists, but cannot be deleted.",
- page->start_time_s, page->metric_id, page->section);
- mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
-
- mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
- if(!*pages_judy_pptr && !JudyLDel(metrics_judy_pptr, page->metric_id, PJE0))
- fatal("DBENGINE CACHE: metric '%lu' in section '%lu' exists and is empty, but cannot be deleted.",
- page->metric_id, page->section);
- mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
-
- mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
- if(!*metrics_judy_pptr && !JudyLDel(&cache->index[partition].sections_judy, page->section, PJE0))
- fatal("DBENGINE CACHE: section '%lu' exists and is empty, but cannot be deleted.", page->section);
- mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
-
- pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
-
- pointer_del(cache, page);
-}
-
-static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(PGC *cache, PGC_PAGE *page) {
- size_t partition = pgc_indexing_partition(cache, page->metric_id);
- pgc_index_write_lock(cache, partition);
- remove_this_page_from_index_unsafe(cache, page, partition);
- pgc_index_write_unlock(cache, partition);
- free_this_page(cache, page, partition);
-}
-
-static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) {
- pointer_check(cache, page);
-
- page_transition_lock(cache, page);
- pgc_ll_lock(cache, &cache->clean);
-
- // make it clean - it does not have any accesses, so it will be prepended
- page_set_clean(cache, page, true, true);
-
- if(!acquired_page_get_for_deletion_or_release_it(cache, page)) {
- pgc_ll_unlock(cache, &cache->clean);
- page_transition_unlock(cache, page);
- return false;
- }
-
- // remove it from the linked list
- pgc_ll_del(cache, &cache->clean, page, true);
- pgc_ll_unlock(cache, &cache->clean);
- page_transition_unlock(cache, page);
-
- remove_and_free_page_not_in_any_queue_and_acquired_for_deletion(cache, page);
-
- return true;
-}
-
-// returns true, when there is more work to do
-static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evict, bool wait, bool all_of_them, evict_filter filter, void *data) {
- size_t per1000 = cache_usage_per1000(cache, NULL);
-
- if(!all_of_them && per1000 < cache->config.healthy_size_per1000)
- // don't bother - not enough to do anything
- return false;
-
- size_t workers_running = __atomic_add_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
- if(!wait && !all_of_them && workers_running > cache->config.max_workers_evict_inline && per1000 < cache->config.severe_pressure_per1000) {
- __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
- return false;
- }
-
- internal_fatal(cache->clean.linked_list_in_sections_judy,
- "wrong clean pages configuration - clean pages need to have a linked list, not a judy array");
-
- if(unlikely(!max_skip))
- max_skip = SIZE_MAX;
- else if(unlikely(max_skip < 2))
- max_skip = 2;
-
- if(unlikely(!max_evict))
- max_evict = SIZE_MAX;
- else if(unlikely(max_evict < 2))
- max_evict = 2;
-
- size_t total_pages_evicted = 0;
- size_t total_pages_skipped = 0;
- bool stopped_before_finishing = false;
- size_t spins = 0;
-
- do {
- if(++spins > 1)
- __atomic_add_fetch(&cache->stats.evict_spins, 1, __ATOMIC_RELAXED);
-
- bool batch;
- size_t max_size_to_evict = 0;
- if (unlikely(all_of_them)) {
- max_size_to_evict = SIZE_MAX;
- batch = true;
- }
- else if(unlikely(wait)) {
- per1000 = cache_usage_per1000(cache, &max_size_to_evict);
- batch = (wait && per1000 > cache->config.severe_pressure_per1000) ? true : false;
- }
- else {
- batch = false;
- max_size_to_evict = (cache_above_healthy_limit(cache)) ? 1 : 0;
- }
-
- if (!max_size_to_evict)
- break;
-
- // check if we have to stop
- if(total_pages_evicted >= max_evict && !all_of_them) {
- stopped_before_finishing = true;
- break;
- }
-
- if(!all_of_them && !wait) {
- if(!pgc_ll_trylock(cache, &cache->clean)) {
- stopped_before_finishing = true;
- goto premature_exit;
- }
-
- // at this point we have the clean lock
- }
- else
- pgc_ll_lock(cache, &cache->clean);
-
- // find a page to evict
- PGC_PAGE *pages_to_evict = NULL;
- size_t pages_to_evict_size = 0;
- for(PGC_PAGE *page = cache->clean.base, *next = NULL, *first_page_we_relocated = NULL; page ; page = next) {
- next = page->link.next;
-
- if(unlikely(page == first_page_we_relocated))
- // we did a complete loop on all pages
- break;
-
- if(unlikely(page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED)) {
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
- page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
- continue;
- }
-
- if(unlikely(filter && !filter(page, data)))
- continue;
-
- if(non_acquired_page_get_for_deletion___while_having_clean_locked(cache, page)) {
- // we can delete this page
-
- // remove it from the clean list
- pgc_ll_del(cache, &cache->clean, page, true);
-
- __atomic_add_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.evicting_size, page->assumed_size, __ATOMIC_RELAXED);
-
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
-
- pages_to_evict_size += page->assumed_size;
-
- if(unlikely(all_of_them || (batch && pages_to_evict_size < max_size_to_evict)))
- // get more pages
- ;
- else
- // one page at a time
- break;
- }
- else {
- // we can't delete this page
-
- if(!first_page_we_relocated)
- first_page_we_relocated = page;
-
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
-
- // check if we have to stop
- if(unlikely(++total_pages_skipped >= max_skip && !all_of_them)) {
- stopped_before_finishing = true;
- break;
- }
- }
- }
- pgc_ll_unlock(cache, &cache->clean);
-
- if(likely(pages_to_evict)) {
- // remove them from the index
-
- if(unlikely(pages_to_evict->link.next)) {
- // we have many pages, let's minimize the index locks we are going to get
-
- PGC_PAGE *pages_per_partition[cache->config.partitions];
- memset(pages_per_partition, 0, sizeof(PGC_PAGE *) * cache->config.partitions);
-
- // sort them by partition
- for (PGC_PAGE *page = pages_to_evict, *next = NULL; page; page = next) {
- next = page->link.next;
-
- size_t partition = pgc_indexing_partition(cache, page->metric_id);
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
- }
-
- // remove them from the index
- for (size_t partition = 0; partition < cache->config.partitions; partition++) {
- if (!pages_per_partition[partition]) continue;
-
- pgc_index_write_lock(cache, partition);
-
- for (PGC_PAGE *page = pages_per_partition[partition]; page; page = page->link.next)
- remove_this_page_from_index_unsafe(cache, page, partition);
-
- pgc_index_write_unlock(cache, partition);
- }
-
- // free them
- for (size_t partition = 0; partition < cache->config.partitions; partition++) {
- if (!pages_per_partition[partition]) continue;
-
- for (PGC_PAGE *page = pages_per_partition[partition], *next = NULL; page; page = next) {
- next = page->link.next;
-
- size_t page_size = page->assumed_size;
- free_this_page(cache, page, partition);
-
- __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
-
- total_pages_evicted++;
- }
- }
- }
- else {
- // just one page to be evicted
- PGC_PAGE *page = pages_to_evict;
-
- size_t page_size = page->assumed_size;
-
- size_t partition = pgc_indexing_partition(cache, page->metric_id);
- pgc_index_write_lock(cache, partition);
- remove_this_page_from_index_unsafe(cache, page, partition);
- pgc_index_write_unlock(cache, partition);
- free_this_page(cache, page, partition);
-
- __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED);
-
- total_pages_evicted++;
- }
- }
- else
- break;
-
- } while(all_of_them || (total_pages_evicted < max_evict && total_pages_skipped < max_skip));
-
- if(all_of_them && !filter) {
- pgc_ll_lock(cache, &cache->clean);
- if(cache->clean.stats->entries) {
- nd_log_limit_static_global_var(erl, 1, 0);
- nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
- "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
- cache->clean.stats->entries);
- }
- pgc_ll_unlock(cache, &cache->clean);
- }
-
-premature_exit:
- if(unlikely(total_pages_skipped))
- __atomic_add_fetch(&cache->stats.evict_skipped, total_pages_skipped, __ATOMIC_RELAXED);
-
- __atomic_sub_fetch(&cache->stats.workers_evict, 1, __ATOMIC_RELAXED);
-
- return stopped_before_finishing;
-}
-
-static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
- internal_fatal(entry->start_time_s < 0 || entry->end_time_s < 0,
- "DBENGINE CACHE: timestamps are negative");
-
- __atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
-
- size_t partition = pgc_indexing_partition(cache, entry->metric_id);
-
-#ifdef PGC_WITH_ARAL
- PGC_PAGE *allocation = aral_mallocz(cache->aral[partition]);
-#endif
- PGC_PAGE *page;
- size_t spins = 0;
-
- if(unlikely(entry->start_time_s < 0))
- entry->start_time_s = 0;
-
- if(unlikely(entry->end_time_s < 0))
- entry->end_time_s = 0;
-
- do {
- if(++spins > 1)
- __atomic_add_fetch(&cache->stats.insert_spins, 1, __ATOMIC_RELAXED);
-
- pgc_index_write_lock(cache, partition);
-
- size_t mem_before_judyl = 0, mem_after_judyl = 0;
-
- mem_before_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
- Pvoid_t *metrics_judy_pptr = JudyLIns(&cache->index[partition].sections_judy, entry->section, PJE0);
- if(unlikely(!metrics_judy_pptr || metrics_judy_pptr == PJERR))
- fatal("DBENGINE CACHE: corrupted sections judy array");
- mem_after_judyl += JudyLMemUsed(cache->index[partition].sections_judy);
-
- mem_before_judyl += JudyLMemUsed(*metrics_judy_pptr);
- Pvoid_t *pages_judy_pptr = JudyLIns(metrics_judy_pptr, entry->metric_id, PJE0);
- if(unlikely(!pages_judy_pptr || pages_judy_pptr == PJERR))
- fatal("DBENGINE CACHE: corrupted pages judy array");
- mem_after_judyl += JudyLMemUsed(*metrics_judy_pptr);
-
- mem_before_judyl += JudyLMemUsed(*pages_judy_pptr);
- Pvoid_t *page_ptr = JudyLIns(pages_judy_pptr, entry->start_time_s, PJE0);
- if(unlikely(!page_ptr || page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in judy array");
- mem_after_judyl += JudyLMemUsed(*pages_judy_pptr);
-
- pgc_stats_index_judy_change(cache, mem_before_judyl, mem_after_judyl);
-
- page = *page_ptr;
-
- if (likely(!page)) {
-#ifdef PGC_WITH_ARAL
- page = allocation;
- allocation = NULL;
-#else
- page = mallocz(sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page);
-#endif
- page->refcount = 1;
- page->accesses = (entry->hot) ? 0 : 1;
- page->flags = 0;
- page->section = entry->section;
- page->metric_id = entry->metric_id;
- page->start_time_s = entry->start_time_s;
- page->end_time_s = entry->end_time_s,
- page->update_every_s = entry->update_every_s,
- page->data = entry->data;
- page->assumed_size = page_assumed_size(cache, entry->size);
- spinlock_init(&page->transition_spinlock);
- page->link.prev = NULL;
- page->link.next = NULL;
-
- if(cache->config.additional_bytes_per_page) {
- if(entry->custom_data)
- memcpy(page->custom_data, entry->custom_data, cache->config.additional_bytes_per_page);
- else
- memset(page->custom_data, 0, cache->config.additional_bytes_per_page);
- }
-
- // put it in the index
- *page_ptr = page;
- pointer_add(cache, page);
- pgc_index_write_unlock(cache, partition);
-
- if (entry->hot)
- page_set_hot(cache, page);
- else
- page_set_clean(cache, page, false, false);
-
- PGC_REFERENCED_PAGES_PLUS1(cache, page);
-
- // update statistics
- __atomic_add_fetch(&cache->stats.added_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.added_size, page->assumed_size, __ATOMIC_RELAXED);
-
- __atomic_add_fetch(&cache->stats.entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.size, page->assumed_size, __ATOMIC_RELAXED);
-
- if(added)
- *added = true;
- }
- else {
- if (!page_acquire(cache, page))
- page = NULL;
-
- else if(added)
- *added = false;
-
- pgc_index_write_unlock(cache, partition);
-
- if(unlikely(!page)) {
- // now that we don't have the lock,
- // give it some time for the old page to go away
- struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 };
- nanosleep(&ns, NULL);
- }
- }
-
- } while(!page);
-
-#ifdef PGC_WITH_ARAL
- if(allocation)
- aral_freez(cache->aral[partition], allocation);
-#endif
-
- __atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
-
- if(!entry->hot)
- evict_on_clean_page_added(cache);
-
- if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
- flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
- false, false);
- }
-
- return page;
-}
-
-static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
- __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
-
- size_t *stats_hit_ptr, *stats_miss_ptr;
-
- if(method == PGC_SEARCH_CLOSEST) {
- __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED);
- stats_hit_ptr = &cache->stats.searches_closest_hits;
- stats_miss_ptr = &cache->stats.searches_closest_misses;
- }
- else {
- __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED);
- stats_hit_ptr = &cache->stats.searches_exact_hits;
- stats_miss_ptr = &cache->stats.searches_exact_misses;
- }
-
- PGC_PAGE *page = NULL;
- size_t partition = pgc_indexing_partition(cache, metric_id);
-
- pgc_index_read_lock(cache, partition);
-
- Pvoid_t *metrics_judy_pptr = JudyLGet(cache->index[partition].sections_judy, section, PJE0);
- if(unlikely(metrics_judy_pptr == PJERR))
- fatal("DBENGINE CACHE: corrupted sections judy array");
-
- if(unlikely(!metrics_judy_pptr)) {
- // section does not exist
- goto cleanup;
- }
-
- Pvoid_t *pages_judy_pptr = JudyLGet(*metrics_judy_pptr, metric_id, PJE0);
- if(unlikely(pages_judy_pptr == PJERR))
- fatal("DBENGINE CACHE: corrupted pages judy array");
-
- if(unlikely(!pages_judy_pptr)) {
- // metric does not exist
- goto cleanup;
- }
-
- switch(method) {
- default:
- case PGC_SEARCH_CLOSEST: {
- Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
-
- else {
- Word_t time = start_time_s;
-
- // find the previous page
- page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
- if(unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array #2");
-
- if(page_ptr) {
- // found a page starting before our timestamp
- // check if our timestamp is included
- page = *page_ptr;
- if(start_time_s > page->end_time_s)
- // it is not good for us
- page = NULL;
- }
-
- if(!page) {
- // find the next page then...
- time = start_time_s;
- page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
- if(page_ptr)
- page = *page_ptr;
- }
- }
- }
- break;
-
- case PGC_SEARCH_EXACT: {
- Pvoid_t *page_ptr = JudyLGet(*pages_judy_pptr, start_time_s, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
- }
- break;
-
- case PGC_SEARCH_FIRST: {
- Word_t time = start_time_s;
- Pvoid_t *page_ptr = JudyLFirst(*pages_judy_pptr, &time, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
- }
- break;
-
- case PGC_SEARCH_NEXT: {
- Word_t time = start_time_s;
- Pvoid_t *page_ptr = JudyLNext(*pages_judy_pptr, &time, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
- }
- break;
-
- case PGC_SEARCH_LAST: {
- Word_t time = start_time_s;
- Pvoid_t *page_ptr = JudyLLast(*pages_judy_pptr, &time, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
- }
- break;
-
- case PGC_SEARCH_PREV: {
- Word_t time = start_time_s;
- Pvoid_t *page_ptr = JudyLPrev(*pages_judy_pptr, &time, PJE0);
- if (unlikely(page_ptr == PJERR))
- fatal("DBENGINE CACHE: corrupted page in pages judy array");
-
- if (page_ptr)
- page = *page_ptr;
- }
- break;
- }
-
- if(page) {
- pointer_check(cache, page);
-
- if(!page_acquire(cache, page)) {
- // this page is not good to use
- page = NULL;
- }
- }
-
-cleanup:
- pgc_index_read_unlock(cache, partition);
-
- if(page) {
- __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED);
- page_has_been_accessed(cache, page);
- }
- else
- __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED);
-
- __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED);
-
- return page;
-}
-
-static void all_hot_pages_to_dirty(PGC *cache, Word_t section) {
- pgc_ll_lock(cache, &cache->hot);
-
- bool first = true;
- Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
- Pvoid_t *section_pages_pptr;
- while ((section_pages_pptr = JudyLFirstThenNext(cache->hot.sections_judy, &last_section, &first))) {
- if(section != PGC_SECTION_ALL && last_section != section)
- break;
-
- struct section_pages *sp = *section_pages_pptr;
-
- PGC_PAGE *page = sp->base;
- while(page) {
- PGC_PAGE *next = page->link.next;
-
- if(page_acquire(cache, page)) {
- page_set_dirty(cache, page, true);
- page_release(cache, page, false);
- // page ptr may be invalid now
- }
-
- page = next;
- }
- }
- pgc_ll_unlock(cache, &cache->hot);
-}
-
-// returns true when there is more work to do
-static bool flush_pages(PGC *cache, size_t max_flushes, Word_t section, bool wait, bool all_of_them) {
- internal_fatal(!cache->dirty.linked_list_in_sections_judy,
- "wrong dirty pages configuration - dirty pages need to have a judy array, not a linked list");
-
- if(!all_of_them && !wait) {
- // we have been called from a data collection thread
- // let's not waste its time...
-
- if(!pgc_ll_trylock(cache, &cache->dirty)) {
- // we would block, so give up...
- return true;
- }
-
- // we got the lock at this point
- }
- else
- pgc_ll_lock(cache, &cache->dirty);
-
- size_t optimal_flush_size = cache->config.max_dirty_pages_per_call;
- size_t dirty_version_at_entry = cache->dirty.version;
- if(!all_of_them && (cache->dirty.stats->entries < optimal_flush_size || cache->dirty.last_version_checked == dirty_version_at_entry)) {
- pgc_ll_unlock(cache, &cache->dirty);
- return false;
- }
-
- __atomic_add_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
-
- bool have_dirty_lock = true;
-
- if(all_of_them || !max_flushes)
- max_flushes = SIZE_MAX;
-
- Word_t last_section = (section == PGC_SECTION_ALL) ? 0 : section;
- size_t flushes_so_far = 0;
- Pvoid_t *section_pages_pptr;
- bool stopped_before_finishing = false;
- size_t spins = 0;
- bool first = true;
-
- while (have_dirty_lock && (section_pages_pptr = JudyLFirstThenNext(cache->dirty.sections_judy, &last_section, &first))) {
- if(section != PGC_SECTION_ALL && last_section != section)
- break;
-
- struct section_pages *sp = *section_pages_pptr;
- if(!all_of_them && sp->entries < optimal_flush_size)
- continue;
-
- if(!all_of_them && flushes_so_far > max_flushes) {
- stopped_before_finishing = true;
- break;
- }
-
- if(++spins > 1)
- __atomic_add_fetch(&cache->stats.flush_spins, 1, __ATOMIC_RELAXED);
-
- PGC_ENTRY array[optimal_flush_size];
- PGC_PAGE *pages[optimal_flush_size];
- size_t pages_added = 0, pages_added_size = 0;
- size_t pages_removed_dirty = 0, pages_removed_dirty_size = 0;
- size_t pages_cancelled = 0, pages_cancelled_size = 0;
- size_t pages_made_clean = 0, pages_made_clean_size = 0;
-
- PGC_PAGE *page = sp->base;
- while (page && pages_added < optimal_flush_size) {
- PGC_PAGE *next = page->link.next;
-
- internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
- "DBENGINE CACHE: page should be in the dirty list before saved");
-
- if (page_acquire(cache, page)) {
- internal_fatal(page_get_status_flags(page) != PGC_PAGE_DIRTY,
- "DBENGINE CACHE: page should be in the dirty list before saved");
-
- internal_fatal(page->section != last_section,
- "DBENGINE CACHE: dirty page is not in the right section (tier)");
-
- if(!page_transition_trylock(cache, page)) {
- page_release(cache, page, false);
- // page ptr may be invalid now
- }
- else {
- pages[pages_added] = page;
- array[pages_added] = (PGC_ENTRY) {
- .section = page->section,
- .metric_id = page->metric_id,
- .start_time_s = page->start_time_s,
- .end_time_s = __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
- .update_every_s = page->update_every_s,
- .size = page_size_from_assumed_size(cache, page->assumed_size),
- .data = page->data,
- .custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL,
- .hot = false,
- };
-
- pages_added_size += page->assumed_size;
- pages_added++;
- }
- }
-
- page = next;
- }
-
- // do we have enough to save?
- if(all_of_them || pages_added == optimal_flush_size) {
- // we should do it
-
- for (size_t i = 0; i < pages_added; i++) {
- PGC_PAGE *tpg = pages[i];
-
- internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
- "DBENGINE CACHE: page should be in the dirty list before saved");
-
- __atomic_add_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
-
- // remove it from the dirty list
- pgc_ll_del(cache, &cache->dirty, tpg, true);
-
- pages_removed_dirty_size += tpg->assumed_size;
- pages_removed_dirty++;
- }
-
- // next time, repeat the same section (tier)
- first = true;
- }
- else {
- // we can't do it
-
- for (size_t i = 0; i < pages_added; i++) {
- PGC_PAGE *tpg = pages[i];
-
- internal_fatal(page_get_status_flags(tpg) != PGC_PAGE_DIRTY,
- "DBENGINE CACHE: page should be in the dirty list before saved");
-
- pages_cancelled_size += tpg->assumed_size;
- pages_cancelled++;
-
- page_transition_unlock(cache, tpg);
- page_release(cache, tpg, false);
- // page ptr may be invalid now
- }
-
- __atomic_add_fetch(&cache->stats.flushes_cancelled, pages_cancelled, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.flushes_cancelled_size, pages_cancelled_size, __ATOMIC_RELAXED);
-
- internal_fatal(pages_added != pages_cancelled || pages_added_size != pages_cancelled_size,
- "DBENGINE CACHE: flushing cancel pages mismatch");
-
- // next time, continue to the next section (tier)
- first = false;
- continue;
- }
-
- if(cache->config.pgc_save_init_cb)
- cache->config.pgc_save_init_cb(cache, last_section);
-
- pgc_ll_unlock(cache, &cache->dirty);
- have_dirty_lock = false;
-
- // call the callback to save them
- // it may take some time, so let's release the lock
- cache->config.pgc_save_dirty_cb(cache, array, pages, pages_added);
- flushes_so_far++;
-
- __atomic_add_fetch(&cache->stats.flushes_completed, pages_added, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.flushes_completed_size, pages_added_size, __ATOMIC_RELAXED);
-
- size_t pages_to_evict = 0; (void)pages_to_evict;
- for (size_t i = 0; i < pages_added; i++) {
- PGC_PAGE *tpg = pages[i];
-
- internal_fatal(page_get_status_flags(tpg) != 0,
- "DBENGINE CACHE: page should not be in any list while it is being saved");
-
- __atomic_sub_fetch(&cache->stats.flushing_entries, 1, __ATOMIC_RELAXED);
- __atomic_sub_fetch(&cache->stats.flushing_size, tpg->assumed_size, __ATOMIC_RELAXED);
-
- pages_made_clean_size += tpg->assumed_size;
- pages_made_clean++;
-
- if(!tpg->accesses)
- pages_to_evict++;
-
- page_set_clean(cache, tpg, true, false);
- page_transition_unlock(cache, tpg);
- page_release(cache, tpg, false);
- // tpg ptr may be invalid now
- }
-
- internal_fatal(pages_added != pages_made_clean || pages_added != pages_removed_dirty ||
- pages_added_size != pages_made_clean_size || pages_added_size != pages_removed_dirty_size
- , "DBENGINE CACHE: flushing pages mismatch");
-
- if(!all_of_them && !wait) {
- if(pgc_ll_trylock(cache, &cache->dirty))
- have_dirty_lock = true;
-
- else {
- stopped_before_finishing = true;
- have_dirty_lock = false;
- }
- }
- else {
- pgc_ll_lock(cache, &cache->dirty);
- have_dirty_lock = true;
- }
- }
-
- if(have_dirty_lock) {
- if(!stopped_before_finishing && dirty_version_at_entry > cache->dirty.last_version_checked)
- cache->dirty.last_version_checked = dirty_version_at_entry;
-
- pgc_ll_unlock(cache, &cache->dirty);
- }
-
- __atomic_sub_fetch(&cache->stats.workers_flush, 1, __ATOMIC_RELAXED);
-
- return stopped_before_finishing;
-}
-
-void free_all_unreferenced_clean_pages(PGC *cache) {
- evict_pages(cache, 0, 0, true, true);
-}
-
-// ----------------------------------------------------------------------------
-// public API
-
-PGC *pgc_create(const char *name,
- size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
- size_t max_dirty_pages_per_flush,
- save_dirty_init_callback pgc_save_init_cb,
- save_dirty_page_callback pgc_save_dirty_cb,
- size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
- size_t max_skip_pages_per_inline_eviction,
- size_t max_flushes_inline,
- PGC_OPTIONS options, size_t partitions, size_t additional_bytes_per_page) {
-
- if(max_pages_per_inline_eviction < 2)
- max_pages_per_inline_eviction = 2;
-
- if(max_dirty_pages_per_flush < 1)
- max_dirty_pages_per_flush = 1;
-
- if(max_flushes_inline * max_dirty_pages_per_flush < 2)
- max_flushes_inline = 2;
-
- PGC *cache = callocz(1, sizeof(PGC));
- strncpyz(cache->config.name, name, PGC_NAME_MAX);
- cache->config.options = options;
- cache->config.clean_size = (clean_size_bytes < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : clean_size_bytes;
- cache->config.pgc_free_clean_cb = pgc_free_cb;
- cache->config.max_dirty_pages_per_call = max_dirty_pages_per_flush;
- cache->config.pgc_save_init_cb = pgc_save_init_cb;
- cache->config.pgc_save_dirty_cb = pgc_save_dirty_cb;
- cache->config.max_pages_per_inline_eviction = max_pages_per_inline_eviction;
- cache->config.max_skip_pages_per_inline_eviction = (max_skip_pages_per_inline_eviction < 2) ? 2 : max_skip_pages_per_inline_eviction;
- cache->config.max_flushes_inline = (max_flushes_inline < 1) ? 1 : max_flushes_inline;
- cache->config.partitions = partitions < 1 ? (size_t)get_netdata_cpus() : partitions;
- cache->config.additional_bytes_per_page = additional_bytes_per_page;
-
- cache->config.max_workers_evict_inline = max_inline_evictors;
- cache->config.severe_pressure_per1000 = 1010;
- cache->config.aggressive_evict_per1000 = 990;
- cache->config.healthy_size_per1000 = 980;
- cache->config.evict_low_threshold_per1000 = 970;
-
- cache->index = callocz(cache->config.partitions, sizeof(struct pgc_index));
-
- for(size_t part = 0; part < cache->config.partitions ; part++)
- rw_spinlock_init(&cache->index[part].rw_spinlock);
-
- spinlock_init(&cache->hot.spinlock);
- spinlock_init(&cache->dirty.spinlock);
- spinlock_init(&cache->clean.spinlock);
-
- cache->hot.flags = PGC_PAGE_HOT;
- cache->hot.linked_list_in_sections_judy = true;
- cache->hot.stats = &cache->stats.queues.hot;
-
- cache->dirty.flags = PGC_PAGE_DIRTY;
- cache->dirty.linked_list_in_sections_judy = true;
- cache->dirty.stats = &cache->stats.queues.dirty;
-
- cache->clean.flags = PGC_PAGE_CLEAN;
- cache->clean.linked_list_in_sections_judy = false;
- cache->clean.stats = &cache->stats.queues.clean;
-
- pgc_section_pages_static_aral_init();
-
-#ifdef PGC_WITH_ARAL
- cache->aral = callocz(cache->config.partitions, sizeof(ARAL *));
- for(size_t part = 0; part < cache->config.partitions ; part++) {
- char buf[100 +1];
- snprintfz(buf, sizeof(buf) - 1, "%s[%zu]", name, part);
- cache->aral[part] = aral_create(
- buf,
- sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
- 0,
- 16384,
- aral_statistics(pgc_section_pages_aral),
- NULL, NULL, false, false);
- }
-#endif
-
- pointer_index_init(cache);
-
- return cache;
-}
-
-struct aral_statistics *pgc_aral_statistics(void) {
- return aral_statistics(pgc_section_pages_aral);
-}
-
-size_t pgc_aral_structures(void) {
- return aral_structures(pgc_section_pages_aral);
-}
-
-size_t pgc_aral_overhead(void) {
- return aral_overhead(pgc_section_pages_aral);
-}
-
-void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) {
- all_hot_pages_to_dirty(cache, section);
-
- // save all dirty pages to make them clean
- flush_pages(cache, 0, section, true, true);
-}
-
-void pgc_destroy(PGC *cache) {
- // convert all hot pages to dirty
- all_hot_pages_to_dirty(cache, PGC_SECTION_ALL);
-
- // save all dirty pages to make them clean
- flush_pages(cache, 0, PGC_SECTION_ALL, true, true);
-
- // free all unreferenced clean pages
- free_all_unreferenced_clean_pages(cache);
-
- if(PGC_REFERENCED_PAGES(cache))
- netdata_log_error("DBENGINE CACHE: there are %zu referenced cache pages - leaving the cache allocated", PGC_REFERENCED_PAGES(cache));
- else {
- pointer_destroy_index(cache);
-
-// for(size_t part = 0; part < cache->config.partitions ; part++)
-// netdata_rwlock_destroy(&cache->index[part].rw_spinlock);
-
-#ifdef PGC_WITH_ARAL
- for(size_t part = 0; part < cache->config.partitions ; part++)
- aral_destroy(cache->aral[part]);
-
- freez(cache->aral);
-#endif
- freez(cache->index);
- freez(cache);
- }
-}
-
-PGC_PAGE *pgc_page_add_and_acquire(PGC *cache, PGC_ENTRY entry, bool *added) {
- return page_add(cache, &entry, added);
-}
-
-PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page) {
- if(!page_acquire(cache, page))
- fatal("DBENGINE CACHE: tried to dup a page that is not acquired!");
-
- return page;
-}
-
-void pgc_page_release(PGC *cache, PGC_PAGE *page) {
- page_release(cache, page, is_page_clean(page));
-}
-
-void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) {
- __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
-
-//#ifdef NETDATA_INTERNAL_CHECKS
-// page_transition_lock(cache, page);
-// internal_fatal(!is_page_hot(page), "DBENGINE CACHE: called %s() but page is not hot", __FUNCTION__ );
-// page_transition_unlock(cache, page);
-//#endif
-
- // make page dirty
- page_set_dirty(cache, page, false);
-
- // release the page
- page_release(cache, page, true);
- // page ptr may be invalid now
-
- __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
-
- // flush, if we have to
- if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) {
- flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL,
- false, false);
- }
-}
-
-bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page) {
- bool ret;
-
- __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
-
- // prevent accesses from increasing the accesses counter
- page_flag_set(page, PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES);
-
- // zero the accesses counter
- __atomic_store_n(&page->accesses, 0, __ATOMIC_RELEASE);
-
- // if there are no other references to it, evict it immediately
- if(make_acquired_page_clean_and_evict_or_page_release(cache, page)) {
- __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_immediately, 1, __ATOMIC_RELAXED);
- ret = true;
- }
- else {
- __atomic_add_fetch(&cache->stats.hot_empty_pages_evicted_later, 1, __ATOMIC_RELAXED);
- ret = false;
- }
-
- __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED);
-
- return ret;
-}
-
-Word_t pgc_page_section(PGC_PAGE *page) {
- return page->section;
-}
-
-Word_t pgc_page_metric(PGC_PAGE *page) {
- return page->metric_id;
-}
-
-time_t pgc_page_start_time_s(PGC_PAGE *page) {
- return page->start_time_s;
-}
-
-time_t pgc_page_end_time_s(PGC_PAGE *page) {
- return page->end_time_s;
-}
-
-time_t pgc_page_update_every_s(PGC_PAGE *page) {
- return page->update_every_s;
-}
-
-time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s) {
- if(page->update_every_s == 0)
- page->update_every_s = (uint32_t) update_every_s;
-
- return page->update_every_s;
-}
-
-time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s) {
- page->end_time_s = end_time_s;
- return page->end_time_s;
-}
-
-void *pgc_page_data(PGC_PAGE *page) {
- return page->data;
-}
-
-void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page) {
- if(cache->config.additional_bytes_per_page)
- return page->custom_data;
-
- return NULL;
-}
-
-size_t pgc_page_data_size(PGC *cache, PGC_PAGE *page) {
- return page_size_from_assumed_size(cache, page->assumed_size);
-}
-
-bool pgc_is_page_hot(PGC_PAGE *page) {
- return is_page_hot(page);
-}
-
-bool pgc_is_page_dirty(PGC_PAGE *page) {
- return is_page_dirty(page);
-}
-
-bool pgc_is_page_clean(PGC_PAGE *page) {
- return is_page_clean(page);
-}
-
-void pgc_reset_hot_max(PGC *cache) {
- size_t entries = __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
- size_t size = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED);
-
- __atomic_store_n(&cache->hot.stats->max_entries, entries, __ATOMIC_RELAXED);
- __atomic_store_n(&cache->hot.stats->max_size, size, __ATOMIC_RELAXED);
-
- size_t size_to_evict = 0;
- cache_usage_per1000(cache, &size_to_evict);
- evict_pages(cache, 0, 0, true, false);
-}
-
-void pgc_set_dynamic_target_cache_size_callback(PGC *cache, dynamic_target_cache_size_callback callback) {
- cache->config.dynamic_target_size_cb = callback;
-
- size_t size_to_evict = 0;
- cache_usage_per1000(cache, &size_to_evict);
- evict_pages(cache, 0, 0, true, false);
-}
-
-size_t pgc_get_current_cache_size(PGC *cache) {
- cache_usage_per1000(cache, NULL);
- return __atomic_load_n(&cache->stats.current_cache_size, __ATOMIC_RELAXED);
-}
-
-size_t pgc_get_wanted_cache_size(PGC *cache) {
- cache_usage_per1000(cache, NULL);
- return __atomic_load_n(&cache->stats.wanted_cache_size, __ATOMIC_RELAXED);
-}
-
-bool pgc_evict_pages(PGC *cache, size_t max_skip, size_t max_evict) {
- bool under_pressure = cache_needs_space_aggressively(cache);
- return evict_pages(cache,
- under_pressure ? 0 : max_skip,
- under_pressure ? 0 : max_evict,
- true, false);
-}
-
-bool pgc_flush_pages(PGC *cache, size_t max_flushes) {
- bool under_pressure = flushing_critical(cache);
- return flush_pages(cache, under_pressure ? 0 : max_flushes, PGC_SECTION_ALL, true, false);
-}
-
-void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s) {
- internal_fatal(!is_page_hot(page),
- "DBENGINE CACHE: end_time_s update on non-hot page");
-
- internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
- "DBENGINE CACHE: end_time_s is not bigger than existing");
-
- __atomic_store_n(&page->end_time_s, end_time_s, __ATOMIC_RELAXED);
-
-#ifdef PGC_COUNT_POINTS_COLLECTED
- __atomic_add_fetch(&cache->stats.points_collected, 1, __ATOMIC_RELAXED);
-#endif
-}
-
-PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) {
- return page_find_and_acquire(cache, section, metric_id, start_time_s, method);
-}
-
-struct pgc_statistics pgc_get_statistics(PGC *cache) {
- // FIXME - get the statistics atomically
- return cache->stats;
-}
-
-size_t pgc_hot_and_dirty_entries(PGC *cache) {
- size_t entries = 0;
-
- entries += __atomic_load_n(&cache->hot.stats->entries, __ATOMIC_RELAXED);
- entries += __atomic_load_n(&cache->dirty.stats->entries, __ATOMIC_RELAXED);
- entries += __atomic_load_n(&cache->stats.flushing_entries, __ATOMIC_RELAXED);
- entries += __atomic_load_n(&cache->stats.hot2dirty_entries, __ATOMIC_RELAXED);
-
- return entries;
-}
-
-void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data) {
- __atomic_add_fetch(&rrdeng_cache_efficiency_stats.journal_v2_indexing_started, 1, __ATOMIC_RELAXED);
- __atomic_add_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
-
- pgc_ll_lock(cache, &cache->hot);
-
- Pvoid_t JudyL_metrics = NULL;
- Pvoid_t JudyL_extents_pos = NULL;
-
- size_t count_of_unique_extents = 0;
- size_t count_of_unique_metrics = 0;
- size_t count_of_unique_pages = 0;
-
- size_t master_extent_index_id = 0;
-
- Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
- if(!section_pages_pptr) {
- pgc_ll_unlock(cache, &cache->hot);
- return;
- }
-
- struct section_pages *sp = *section_pages_pptr;
- if(!spinlock_trylock(&sp->migration_to_v2_spinlock)) {
- netdata_log_info("DBENGINE: migration to journal v2 for datafile %u is postponed, another jv2 indexer is already running for this section", datafile_fileno);
- pgc_ll_unlock(cache, &cache->hot);
- return;
- }
-
- ARAL *ar_mi = aral_by_size_acquire(sizeof(struct jv2_metrics_info));
- ARAL *ar_pi = aral_by_size_acquire(sizeof(struct jv2_page_info));
- ARAL *ar_ei = aral_by_size_acquire(sizeof(struct jv2_extents_info));
-
- for(PGC_PAGE *page = sp->base; page ; page = page->link.next) {
- struct extent_io_data *xio = (struct extent_io_data *)page->custom_data;
- if(xio->fileno != datafile_fileno) continue;
-
- if(page_flag_check(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2)) {
- internal_fatal(true, "Migration to journal v2: page has already been migrated to v2");
- continue;
- }
-
- if(!page_transition_trylock(cache, page)) {
- internal_fatal(true, "Migration to journal v2: cannot get page transition lock");
- continue;
- }
-
- if(!page_acquire(cache, page)) {
- internal_fatal(true, "Migration to journal v2: cannot acquire page for migration to v2");
- continue;
- }
-
- page_flag_set(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
-
- pgc_ll_unlock(cache, &cache->hot);
-
- // update the extents JudyL
-
- size_t current_extent_index_id;
- Pvoid_t *PValue = JudyLIns(&JudyL_extents_pos, xio->pos, PJE0);
- if(!PValue || *PValue == PJERR)
- fatal("Corrupted JudyL extents pos");
-
- struct jv2_extents_info *ei;
- if(!*PValue) {
- ei = aral_mallocz(ar_ei); // callocz(1, sizeof(struct jv2_extents_info));
- ei->pos = xio->pos;
- ei->bytes = xio->bytes;
- ei->number_of_pages = 1;
- ei->index = master_extent_index_id++;
- *PValue = ei;
-
- count_of_unique_extents++;
- }
- else {
- ei = *PValue;
- ei->number_of_pages++;
- }
-
- current_extent_index_id = ei->index;
-
- // update the metrics JudyL
-
- PValue = JudyLIns(&JudyL_metrics, page->metric_id, PJE0);
- if(!PValue || *PValue == PJERR)
- fatal("Corrupted JudyL metrics");
-
- struct jv2_metrics_info *mi;
- if(!*PValue) {
- mi = aral_mallocz(ar_mi); // callocz(1, sizeof(struct jv2_metrics_info));
- mi->uuid = mrg_metric_uuid(main_mrg, (METRIC *)page->metric_id);
- mi->first_time_s = page->start_time_s;
- mi->last_time_s = page->end_time_s;
- mi->number_of_pages = 1;
- mi->page_list_header = 0;
- mi->JudyL_pages_by_start_time = NULL;
- *PValue = mi;
-
- count_of_unique_metrics++;
- }
- else {
- mi = *PValue;
- mi->number_of_pages++;
- if(page->start_time_s < mi->first_time_s)
- mi->first_time_s = page->start_time_s;
- if(page->end_time_s > mi->last_time_s)
- mi->last_time_s = page->end_time_s;
- }
-
- PValue = JudyLIns(&mi->JudyL_pages_by_start_time, page->start_time_s, PJE0);
- if(!PValue || *PValue == PJERR)
- fatal("Corrupted JudyL metric pages");
-
- if(!*PValue) {
- struct jv2_page_info *pi = aral_mallocz(ar_pi); // callocz(1, (sizeof(struct jv2_page_info)));
- pi->start_time_s = page->start_time_s;
- pi->end_time_s = page->end_time_s;
- pi->update_every_s = page->update_every_s;
- pi->page_length = page_size_from_assumed_size(cache, page->assumed_size);
- pi->page = page;
- pi->extent_index = current_extent_index_id;
- pi->custom_data = (cache->config.additional_bytes_per_page) ? page->custom_data : NULL;
- *PValue = pi;
-
- count_of_unique_pages++;
- }
- else {
- // impossible situation
- internal_fatal(true, "Page is already in JudyL metric pages");
- page_flag_clear(page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
- page_transition_unlock(cache, page);
- page_release(cache, page, false);
- }
-
- pgc_ll_lock(cache, &cache->hot);
- }
-
- spinlock_unlock(&sp->migration_to_v2_spinlock);
- pgc_ll_unlock(cache, &cache->hot);
-
- // callback
- cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
-
- {
- Pvoid_t *PValue1;
- bool metric_id_first = true;
- Word_t metric_id = 0;
- while ((PValue1 = JudyLFirstThenNext(JudyL_metrics, &metric_id, &metric_id_first))) {
- struct jv2_metrics_info *mi = *PValue1;
-
- Pvoid_t *PValue2;
- bool start_time_first = true;
- Word_t start_time = 0;
- while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) {
- struct jv2_page_info *pi = *PValue2;
- page_transition_unlock(cache, pi->page);
- pgc_page_hot_to_dirty_and_release(cache, pi->page);
- // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page);
- aral_freez(ar_pi, pi);
- }
-
- JudyLFreeArray(&mi->JudyL_pages_by_start_time, PJE0);
- aral_freez(ar_mi, mi);
- }
- JudyLFreeArray(&JudyL_metrics, PJE0);
- }
-
- {
- Pvoid_t *PValue;
- bool extent_pos_first = true;
- Word_t extent_pos = 0;
- while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &extent_pos, &extent_pos_first))) {
- struct jv2_extents_info *ei = *PValue;
- aral_freez(ar_ei, ei);
- }
- JudyLFreeArray(&JudyL_extents_pos, PJE0);
- }
-
- aral_by_size_release(ar_ei);
- aral_by_size_release(ar_pi);
- aral_by_size_release(ar_mi);
-
- __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED);
-}
-
-static bool match_page_data(PGC_PAGE *page, void *data) {
- return (page->data == data);
-}
-
-void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile) {
- evict_pages_with_filter(cache, 0, 0, true, true, match_page_data, datafile);
-}
-
-size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
- size_t found = 0;
-
- pgc_ll_lock(cache, &cache->clean);
- for(PGC_PAGE *page = cache->clean.base; page ;page = page->link.next)
- found += (page->data == ptr && page->section == section) ? 1 : 0;
- pgc_ll_unlock(cache, &cache->clean);
-
- return found;
-}
-
-size_t pgc_count_hot_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr) {
- size_t found = 0;
-
- pgc_ll_lock(cache, &cache->hot);
- Pvoid_t *section_pages_pptr = JudyLGet(cache->hot.sections_judy, section, PJE0);
- if(section_pages_pptr) {
- struct section_pages *sp = *section_pages_pptr;
- for(PGC_PAGE *page = sp->base; page ;page = page->link.next)
- found += (page->data == ptr) ? 1 : 0;
- }
- pgc_ll_unlock(cache, &cache->hot);
-
- return found;
-}
-
-// ----------------------------------------------------------------------------
-// unittest
-
-static void unittest_free_clean_page_callback(PGC *cache __maybe_unused, PGC_ENTRY entry __maybe_unused) {
- ;
-}
-
-static void unittest_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
- ;
-}
-
-#ifdef PGC_STRESS_TEST
-
-struct {
- bool stop;
- PGC *cache;
- PGC_PAGE **metrics;
- size_t clean_metrics;
- size_t hot_metrics;
- time_t first_time_t;
- time_t last_time_t;
- size_t cache_size;
- size_t query_threads;
- size_t collect_threads;
- size_t partitions;
- size_t points_per_page;
- time_t time_per_collection_ut;
- time_t time_per_query_ut;
- time_t time_per_flush_ut;
- PGC_OPTIONS options;
- char rand_statebufs[1024];
- struct random_data *random_data;
-} pgc_uts = {
- .stop = false,
- .metrics = NULL,
- .clean_metrics = 100000,
- .hot_metrics = 1000000,
- .first_time_t = 100000000,
- .last_time_t = 0,
- .cache_size = 0, // get the default (8MB)
- .collect_threads = 16,
- .query_threads = 16,
- .partitions = 0, // get the default (system cpus)
- .options = PGC_OPTIONS_AUTOSCALE,/* PGC_OPTIONS_FLUSH_PAGES_INLINE | PGC_OPTIONS_EVICT_PAGES_INLINE,*/
- .points_per_page = 10,
- .time_per_collection_ut = 1000000,
- .time_per_query_ut = 250,
- .time_per_flush_ut = 100,
- .rand_statebufs = {},
- .random_data = NULL,
-};
-
-void *unittest_stress_test_collector(void *ptr) {
- size_t id = *((size_t *)ptr);
-
- size_t metric_start = pgc_uts.clean_metrics;
- size_t metric_end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
- size_t number_of_metrics = metric_end - metric_start;
- size_t per_collector_metrics = number_of_metrics / pgc_uts.collect_threads;
- metric_start = metric_start + per_collector_metrics * id + 1;
- metric_end = metric_start + per_collector_metrics - 1;
-
- time_t start_time_t = pgc_uts.first_time_t + 1;
-
- heartbeat_t hb;
- heartbeat_init(&hb);
-
- while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
- // netdata_log_info("COLLECTOR %zu: collecting metrics %zu to %zu, from %ld to %lu", id, metric_start, metric_end, start_time_t, start_time_t + pgc_uts.points_per_page);
-
- netdata_thread_disable_cancelability();
-
- for (size_t i = metric_start; i < metric_end; i++) {
- bool added;
-
- pgc_uts.metrics[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
- .section = 1,
- .metric_id = i,
- .start_time_t = start_time_t,
- .end_time_t = start_time_t,
- .update_every = 1,
- .size = 4096,
- .data = NULL,
- .hot = true,
- }, &added);
-
- if(!pgc_is_page_hot(pgc_uts.metrics[i]) || !added) {
- pgc_page_release(pgc_uts.cache, pgc_uts.metrics[i]);
- pgc_uts.metrics[i] = NULL;
- }
- }
-
- time_t end_time_t = start_time_t + (time_t)pgc_uts.points_per_page;
- while(++start_time_t <= end_time_t && !__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
- heartbeat_next(&hb, pgc_uts.time_per_collection_ut);
-
- for (size_t i = metric_start; i < metric_end; i++) {
- if(pgc_uts.metrics[i])
- pgc_page_hot_set_end_time_t(pgc_uts.cache, pgc_uts.metrics[i], start_time_t);
- }
-
- __atomic_store_n(&pgc_uts.last_time_t, start_time_t, __ATOMIC_RELAXED);
- }
-
- for (size_t i = metric_start; i < metric_end; i++) {
- if (pgc_uts.metrics[i]) {
- if(i % 10 == 0)
- pgc_page_to_clean_evict_or_release(pgc_uts.cache, pgc_uts.metrics[i]);
- else
- pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i]);
- }
- }
-
- netdata_thread_enable_cancelability();
- }
-
- return ptr;
-}
-
-void *unittest_stress_test_queries(void *ptr) {
- size_t id = *((size_t *)ptr);
- struct random_data *random_data = &pgc_uts.random_data[id];
-
- size_t start = 0;
- size_t end = pgc_uts.clean_metrics + pgc_uts.hot_metrics;
-
- while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
- netdata_thread_disable_cancelability();
-
- int32_t random_number;
- random_r(random_data, &random_number);
-
- size_t metric_id = random_number % (end - start);
- time_t start_time_t = pgc_uts.first_time_t;
- time_t end_time_t = __atomic_load_n(&pgc_uts.last_time_t, __ATOMIC_RELAXED);
- if(end_time_t <= start_time_t)
- end_time_t = start_time_t + 1;
- size_t pages = (end_time_t - start_time_t) / pgc_uts.points_per_page + 1;
-
- PGC_PAGE *array[pages];
- for(size_t i = 0; i < pages ;i++)
- array[i] = NULL;
-
- // find the pages the cache has
- for(size_t i = 0; i < pages ;i++) {
- time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
- array[i] = pgc_page_get_and_acquire(pgc_uts.cache, 1, metric_id,
- page_start_time, (i < pages - 1)?PGC_SEARCH_EXACT:PGC_SEARCH_CLOSEST);
- }
-
- // load the rest of the pages
- for(size_t i = 0; i < pages ;i++) {
- if(array[i]) continue;
-
- time_t page_start_time = start_time_t + (time_t)(i * pgc_uts.points_per_page);
- array[i] = pgc_page_add_and_acquire(pgc_uts.cache, (PGC_ENTRY) {
- .section = 1,
- .metric_id = metric_id,
- .start_time_t = page_start_time,
- .end_time_t = page_start_time + (time_t)pgc_uts.points_per_page,
- .update_every = 1,
- .size = 4096,
- .data = NULL,
- .hot = false,
- }, NULL);
- }
-
- // do the query
- // ...
- struct timespec work_duration = {.tv_sec = 0, .tv_nsec = pgc_uts.time_per_query_ut * NSEC_PER_USEC };
- nanosleep(&work_duration, NULL);
-
- // release the pages
- for(size_t i = 0; i < pages ;i++) {
- if(!array[i]) continue;
- pgc_page_release(pgc_uts.cache, array[i]);
- array[i] = NULL;
- }
-
- netdata_thread_enable_cancelability();
- }
-
- return ptr;
-}
-
-void *unittest_stress_test_service(void *ptr) {
- heartbeat_t hb;
- heartbeat_init(&hb);
- while(!__atomic_load_n(&pgc_uts.stop, __ATOMIC_RELAXED)) {
- heartbeat_next(&hb, 1 * USEC_PER_SEC);
-
- pgc_flush_pages(pgc_uts.cache, 1000);
- pgc_evict_pages(pgc_uts.cache, 0, 0);
- }
- return ptr;
-}
-
-static void unittest_stress_test_save_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused) {
- // netdata_log_info("SAVE %zu pages", entries);
- if(!pgc_uts.stop) {
- usec_t t = pgc_uts.time_per_flush_ut;
-
- if(t > 0) {
- struct timespec work_duration = {
- .tv_sec = t / USEC_PER_SEC,
- .tv_nsec = (long) ((t % USEC_PER_SEC) * NSEC_PER_USEC)
- };
-
- nanosleep(&work_duration, NULL);
- }
- }
-}
-
-void unittest_stress_test(void) {
- pgc_uts.cache = pgc_create(pgc_uts.cache_size * 1024 * 1024,
- unittest_free_clean_page_callback,
- 64, unittest_stress_test_save_dirty_page_callback,
- 1000, 10000, 1,
- pgc_uts.options, pgc_uts.partitions, 0);
-
- pgc_uts.metrics = callocz(pgc_uts.clean_metrics + pgc_uts.hot_metrics, sizeof(PGC_PAGE *));
-
- pthread_t service_thread;
- netdata_thread_create(&service_thread, "SERVICE",
- NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
- unittest_stress_test_service, NULL);
-
- pthread_t collect_threads[pgc_uts.collect_threads];
- size_t collect_thread_ids[pgc_uts.collect_threads];
- for(size_t i = 0; i < pgc_uts.collect_threads ;i++) {
- collect_thread_ids[i] = i;
- char buffer[100 + 1];
- snprintfz(buffer, sizeof(buffer) - 1, "COLLECT_%zu", i);
- netdata_thread_create(&collect_threads[i], buffer,
- NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
- unittest_stress_test_collector, &collect_thread_ids[i]);
- }
-
- pthread_t queries_threads[pgc_uts.query_threads];
- size_t query_thread_ids[pgc_uts.query_threads];
- pgc_uts.random_data = callocz(pgc_uts.query_threads, sizeof(struct random_data));
- for(size_t i = 0; i < pgc_uts.query_threads ;i++) {
- query_thread_ids[i] = i;
- char buffer[100 + 1];
- snprintfz(buffer, sizeof(buffer) - 1, "QUERY_%zu", i);
- initstate_r(1, pgc_uts.rand_statebufs, 1024, &pgc_uts.random_data[i]);
- netdata_thread_create(&queries_threads[i], buffer,
- NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
- unittest_stress_test_queries, &query_thread_ids[i]);
- }
-
- heartbeat_t hb;
- heartbeat_init(&hb);
-
- struct {
- size_t entries;
- size_t added;
- size_t deleted;
- size_t referenced;
-
- size_t hot_entries;
- size_t hot_added;
- size_t hot_deleted;
-
- size_t dirty_entries;
- size_t dirty_added;
- size_t dirty_deleted;
-
- size_t clean_entries;
- size_t clean_added;
- size_t clean_deleted;
-
- size_t searches_exact;
- size_t searches_exact_hits;
- size_t searches_closest;
- size_t searches_closest_hits;
-
- size_t collections;
-
- size_t events_cache_under_severe_pressure;
- size_t events_cache_needs_space_90;
- size_t events_flush_critical;
- } stats = {}, old_stats = {};
-
- for(int i = 0; i < 86400 ;i++) {
- heartbeat_next(&hb, 1 * USEC_PER_SEC);
-
- old_stats = stats;
- stats.entries = __atomic_load_n(&pgc_uts.cache->stats.entries, __ATOMIC_RELAXED);
- stats.added = __atomic_load_n(&pgc_uts.cache->stats.added_entries, __ATOMIC_RELAXED);
- stats.deleted = __atomic_load_n(&pgc_uts.cache->stats.removed_entries, __ATOMIC_RELAXED);
- stats.referenced = __atomic_load_n(&pgc_uts.cache->stats.referenced_entries, __ATOMIC_RELAXED);
-
- stats.hot_entries = __atomic_load_n(&pgc_uts.cache->hot.stats->entries, __ATOMIC_RELAXED);
- stats.hot_added = __atomic_load_n(&pgc_uts.cache->hot.stats->added_entries, __ATOMIC_RELAXED);
- stats.hot_deleted = __atomic_load_n(&pgc_uts.cache->hot.stats->removed_entries, __ATOMIC_RELAXED);
-
- stats.dirty_entries = __atomic_load_n(&pgc_uts.cache->dirty.stats->entries, __ATOMIC_RELAXED);
- stats.dirty_added = __atomic_load_n(&pgc_uts.cache->dirty.stats->added_entries, __ATOMIC_RELAXED);
- stats.dirty_deleted = __atomic_load_n(&pgc_uts.cache->dirty.stats->removed_entries, __ATOMIC_RELAXED);
-
- stats.clean_entries = __atomic_load_n(&pgc_uts.cache->clean.stats->entries, __ATOMIC_RELAXED);
- stats.clean_added = __atomic_load_n(&pgc_uts.cache->clean.stats->added_entries, __ATOMIC_RELAXED);
- stats.clean_deleted = __atomic_load_n(&pgc_uts.cache->clean.stats->removed_entries, __ATOMIC_RELAXED);
-
- stats.searches_exact = __atomic_load_n(&pgc_uts.cache->stats.searches_exact, __ATOMIC_RELAXED);
- stats.searches_exact_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_exact_hits, __ATOMIC_RELAXED);
-
- stats.searches_closest = __atomic_load_n(&pgc_uts.cache->stats.searches_closest, __ATOMIC_RELAXED);
- stats.searches_closest_hits = __atomic_load_n(&pgc_uts.cache->stats.searches_closest_hits, __ATOMIC_RELAXED);
-
- stats.events_cache_under_severe_pressure = __atomic_load_n(&pgc_uts.cache->stats.events_cache_under_severe_pressure, __ATOMIC_RELAXED);
- stats.events_cache_needs_space_90 = __atomic_load_n(&pgc_uts.cache->stats.events_cache_needs_space_aggressively, __ATOMIC_RELAXED);
- stats.events_flush_critical = __atomic_load_n(&pgc_uts.cache->stats.events_flush_critical, __ATOMIC_RELAXED);
-
- size_t searches_exact = stats.searches_exact - old_stats.searches_exact;
- size_t searches_closest = stats.searches_closest - old_stats.searches_closest;
-
- size_t hit_exact = stats.searches_exact_hits - old_stats.searches_exact_hits;
- size_t hit_closest = stats.searches_closest_hits - old_stats.searches_closest_hits;
-
- double hit_exact_pc = (searches_exact > 0) ? (double)hit_exact * 100.0 / (double)searches_exact : 0.0;
- double hit_closest_pc = (searches_closest > 0) ? (double)hit_closest * 100.0 / (double)searches_closest : 0.0;
-
-#ifdef PGC_COUNT_POINTS_COLLECTED
- stats.collections = __atomic_load_n(&pgc_uts.cache->stats.points_collected, __ATOMIC_RELAXED);
-#endif
-
- char *cache_status = "N";
- if(stats.events_cache_under_severe_pressure > old_stats.events_cache_under_severe_pressure)
- cache_status = "F";
- else if(stats.events_cache_needs_space_90 > old_stats.events_cache_needs_space_90)
- cache_status = "f";
-
- char *flushing_status = "N";
- if(stats.events_flush_critical > old_stats.events_flush_critical)
- flushing_status = "F";
-
- netdata_log_info("PGS %5zuk +%4zuk/-%4zuk "
- "| RF %5zuk "
- "| HOT %5zuk +%4zuk -%4zuk "
- "| DRT %s %5zuk +%4zuk -%4zuk "
- "| CLN %s %5zuk +%4zuk -%4zuk "
- "| SRCH %4zuk %4zuk, HIT %4.1f%% %4.1f%% "
-#ifdef PGC_COUNT_POINTS_COLLECTED
- "| CLCT %8.4f Mps"
-#endif
- , stats.entries / 1000
- , (stats.added - old_stats.added) / 1000, (stats.deleted - old_stats.deleted) / 1000
- , stats.referenced / 1000
- , stats.hot_entries / 1000, (stats.hot_added - old_stats.hot_added) / 1000, (stats.hot_deleted - old_stats.hot_deleted) / 1000
- , flushing_status
- , stats.dirty_entries / 1000
- , (stats.dirty_added - old_stats.dirty_added) / 1000, (stats.dirty_deleted - old_stats.dirty_deleted) / 1000
- , cache_status
- , stats.clean_entries / 1000
- , (stats.clean_added - old_stats.clean_added) / 1000, (stats.clean_deleted - old_stats.clean_deleted) / 1000
- , searches_exact / 1000, searches_closest / 1000
- , hit_exact_pc, hit_closest_pc
-#ifdef PGC_COUNT_POINTS_COLLECTED
- , (double)(stats.collections - old_stats.collections) / 1000.0 / 1000.0
-#endif
- );
- }
- netdata_log_info("Waiting for threads to stop...");
- __atomic_store_n(&pgc_uts.stop, true, __ATOMIC_RELAXED);
-
- netdata_thread_join(service_thread, NULL);
-
- for(size_t i = 0; i < pgc_uts.collect_threads ;i++)
- netdata_thread_join(collect_threads[i],NULL);
-
- for(size_t i = 0; i < pgc_uts.query_threads ;i++)
- netdata_thread_join(queries_threads[i],NULL);
-
- pgc_destroy(pgc_uts.cache);
-
- freez(pgc_uts.metrics);
- freez(pgc_uts.random_data);
-}
-#endif
-
-int pgc_unittest(void) {
- PGC *cache = pgc_create("test",
- 32 * 1024 * 1024, unittest_free_clean_page_callback,
- 64, NULL, unittest_save_dirty_page_callback,
- 10, 10, 1000, 10,
- PGC_OPTIONS_DEFAULT, 1, 11);
-
- // FIXME - unit tests
- // - add clean page
- // - add clean page again (should not add it)
- // - release page (should decrement counters)
- // - add hot page
- // - add hot page again (should not add it)
- // - turn hot page to dirty, with and without a reference counter to it
- // - dirty pages are saved once there are enough of them
- // - find page exact
- // - find page (should return last)
- // - find page (should return next)
- // - page cache full (should evict)
- // - on destroy, turn hot pages to dirty and save them
-
- PGC_PAGE *page1 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
- .section = 1,
- .metric_id = 10,
- .start_time_s = 100,
- .end_time_s = 1000,
- .size = 4096,
- .data = NULL,
- .hot = false,
- .custom_data = (uint8_t *)"0123456789",
- }, NULL);
-
- if(strcmp(pgc_page_custom_data(cache, page1), "0123456789") != 0)
- fatal("custom data do not work");
-
- memcpy(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ", 11);
- if(strcmp(pgc_page_custom_data(cache, page1), "ABCDEFGHIJ") != 0)
- fatal("custom data do not work");
-
- pgc_page_release(cache, page1);
-
- PGC_PAGE *page2 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
- .section = 2,
- .metric_id = 10,
- .start_time_s = 1001,
- .end_time_s = 2000,
- .size = 4096,
- .data = NULL,
- .hot = true,
- }, NULL);
-
- pgc_page_hot_set_end_time_s(cache, page2, 2001);
- pgc_page_hot_to_dirty_and_release(cache, page2);
-
- PGC_PAGE *page3 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){
- .section = 3,
- .metric_id = 10,
- .start_time_s = 1001,
- .end_time_s = 2000,
- .size = 4096,
- .data = NULL,
- .hot = true,
- }, NULL);
-
- pgc_page_hot_set_end_time_s(cache, page3, 2001);
- pgc_page_hot_to_dirty_and_release(cache, page3);
-
- pgc_destroy(cache);
-
-#ifdef PGC_STRESS_TEST
- unittest_stress_test();
-#endif
-
- return 0;
-}
diff --git a/database/engine/cache.h b/database/engine/cache.h
deleted file mode 100644
index 7cd7c0636..000000000
--- a/database/engine/cache.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-#ifndef DBENGINE_CACHE_H
-#define DBENGINE_CACHE_H
-
-#include "../rrd.h"
-
-// CACHE COMPILE TIME CONFIGURATION
-// #define PGC_COUNT_POINTS_COLLECTED 1
-
-typedef struct pgc PGC;
-typedef struct pgc_page PGC_PAGE;
-#define PGC_NAME_MAX 23
-
-typedef enum __attribute__ ((__packed__)) {
- PGC_OPTIONS_NONE = 0,
- PGC_OPTIONS_EVICT_PAGES_INLINE = (1 << 0),
- PGC_OPTIONS_FLUSH_PAGES_INLINE = (1 << 1),
- PGC_OPTIONS_AUTOSCALE = (1 << 2),
-} PGC_OPTIONS;
-
-#define PGC_OPTIONS_DEFAULT (PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE | PGC_OPTIONS_AUTOSCALE)
-
-typedef struct pgc_entry {
- Word_t section; // the section this belongs to
- Word_t metric_id; // the metric this belongs to
- time_t start_time_s; // the start time of the page
- time_t end_time_s; // the end time of the page
- size_t size; // the size in bytes of the allocation, outside the cache
- void *data; // a pointer to data outside the cache
- uint32_t update_every_s; // the update every of the page
- bool hot; // true if this entry is currently being collected
- uint8_t *custom_data;
-} PGC_ENTRY;
-
-#define PGC_CACHE_LINE_PADDING(x) uint8_t padding##x[64]
-
-struct pgc_queue_statistics {
- size_t entries;
- size_t size;
-
- PGC_CACHE_LINE_PADDING(1);
-
- size_t max_entries;
- size_t max_size;
-
- PGC_CACHE_LINE_PADDING(2);
-
- size_t added_entries;
- size_t added_size;
-
- PGC_CACHE_LINE_PADDING(3);
-
- size_t removed_entries;
- size_t removed_size;
-
- PGC_CACHE_LINE_PADDING(4);
-};
-
-struct pgc_statistics {
- size_t wanted_cache_size;
- size_t current_cache_size;
-
- PGC_CACHE_LINE_PADDING(1);
-
- size_t added_entries;
- size_t added_size;
-
- PGC_CACHE_LINE_PADDING(2);
-
- size_t removed_entries;
- size_t removed_size;
-
- PGC_CACHE_LINE_PADDING(3);
-
- size_t entries; // all the entries (includes clean, dirty, hot)
- size_t size; // all the entries (includes clean, dirty, hot)
-
- size_t evicting_entries;
- size_t evicting_size;
-
- size_t flushing_entries;
- size_t flushing_size;
-
- size_t hot2dirty_entries;
- size_t hot2dirty_size;
-
- PGC_CACHE_LINE_PADDING(4);
-
- size_t acquires;
- PGC_CACHE_LINE_PADDING(4a);
- size_t releases;
- PGC_CACHE_LINE_PADDING(4b);
- size_t acquires_for_deletion;
- PGC_CACHE_LINE_PADDING(4c);
-
- size_t referenced_entries; // all the entries currently referenced
- size_t referenced_size; // all the entries currently referenced
-
- PGC_CACHE_LINE_PADDING(5);
-
- size_t searches_exact;
- size_t searches_exact_hits;
- size_t searches_exact_misses;
-
- PGC_CACHE_LINE_PADDING(6);
-
- size_t searches_closest;
- size_t searches_closest_hits;
- size_t searches_closest_misses;
-
- PGC_CACHE_LINE_PADDING(7);
-
- size_t flushes_completed;
- size_t flushes_completed_size;
- size_t flushes_cancelled;
- size_t flushes_cancelled_size;
-
-#ifdef PGC_COUNT_POINTS_COLLECTED
- PGC_CACHE_LINE_PADDING(8);
- size_t points_collected;
-#endif
-
- PGC_CACHE_LINE_PADDING(9);
-
- size_t insert_spins;
- size_t evict_spins;
- size_t release_spins;
- size_t acquire_spins;
- size_t delete_spins;
- size_t flush_spins;
-
- PGC_CACHE_LINE_PADDING(10);
-
- size_t workers_search;
- size_t workers_add;
- size_t workers_evict;
- size_t workers_flush;
- size_t workers_jv2_flush;
- size_t workers_hot2dirty;
-
- size_t evict_skipped;
- size_t hot_empty_pages_evicted_immediately;
- size_t hot_empty_pages_evicted_later;
-
- PGC_CACHE_LINE_PADDING(11);
-
- // events
- size_t events_cache_under_severe_pressure;
- size_t events_cache_needs_space_aggressively;
- size_t events_flush_critical;
-
- PGC_CACHE_LINE_PADDING(12);
-
- struct {
- PGC_CACHE_LINE_PADDING(0);
- struct pgc_queue_statistics hot;
- PGC_CACHE_LINE_PADDING(1);
- struct pgc_queue_statistics dirty;
- PGC_CACHE_LINE_PADDING(2);
- struct pgc_queue_statistics clean;
- PGC_CACHE_LINE_PADDING(3);
- } queues;
-};
-
-
-typedef void (*free_clean_page_callback)(PGC *cache, PGC_ENTRY entry);
-typedef void (*save_dirty_page_callback)(PGC *cache, PGC_ENTRY *entries_array, PGC_PAGE **pages_array, size_t entries);
-typedef void (*save_dirty_init_callback)(PGC *cache, Word_t section);
-// create a cache
-PGC *pgc_create(const char *name,
- size_t clean_size_bytes, free_clean_page_callback pgc_free_clean_cb,
- size_t max_dirty_pages_per_flush, save_dirty_init_callback pgc_save_init_cb, save_dirty_page_callback pgc_save_dirty_cb,
- size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
- size_t max_skip_pages_per_inline_eviction,
- size_t max_flushes_inline,
- PGC_OPTIONS options, size_t partitions, size_t additional_bytes_per_page);
-
-// destroy the cache
-void pgc_destroy(PGC *cache);
-
-#define PGC_SECTION_ALL ((Word_t)0)
-void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section);
-
-// add a page to the cache and return a pointer to it
-PGC_PAGE *pgc_page_add_and_acquire(PGC *cache, PGC_ENTRY entry, bool *added);
-
-// get another reference counter on an already referenced page
-PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page);
-
-// release a page (all pointers to it are now invalid)
-void pgc_page_release(PGC *cache, PGC_PAGE *page);
-
-// mark a hot page dirty, and release it
-void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page);
-
-// find a page from the cache
-typedef enum {
- PGC_SEARCH_EXACT,
- PGC_SEARCH_CLOSEST,
- PGC_SEARCH_FIRST,
- PGC_SEARCH_NEXT,
- PGC_SEARCH_LAST,
- PGC_SEARCH_PREV,
-} PGC_SEARCH;
-
-PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method);
-
-// get information from an acquired page
-Word_t pgc_page_section(PGC_PAGE *page);
-Word_t pgc_page_metric(PGC_PAGE *page);
-time_t pgc_page_start_time_s(PGC_PAGE *page);
-time_t pgc_page_end_time_s(PGC_PAGE *page);
-time_t pgc_page_update_every_s(PGC_PAGE *page);
-time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s);
-time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s);
-void *pgc_page_data(PGC_PAGE *page);
-void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page);
-size_t pgc_page_data_size(PGC *cache, PGC_PAGE *page);
-bool pgc_is_page_hot(PGC_PAGE *page);
-bool pgc_is_page_dirty(PGC_PAGE *page);
-bool pgc_is_page_clean(PGC_PAGE *page);
-void pgc_reset_hot_max(PGC *cache);
-size_t pgc_get_current_cache_size(PGC *cache);
-size_t pgc_get_wanted_cache_size(PGC *cache);
-
-// resetting the end time of a hot page
-void pgc_page_hot_set_end_time_s(PGC *cache, PGC_PAGE *page, time_t end_time_s);
-bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page);
-
-typedef void (*migrate_to_v2_callback)(Word_t section, unsigned datafile_fileno, uint8_t type, Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos, size_t count_of_unique_extents, size_t count_of_unique_metrics, size_t count_of_unique_pages, void *data);
-void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data);
-void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile);
-size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr);
-size_t pgc_count_hot_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr);
-
-typedef size_t (*dynamic_target_cache_size_callback)(void);
-void pgc_set_dynamic_target_cache_size_callback(PGC *cache, dynamic_target_cache_size_callback callback);
-
-// return true when there is more work to do
-bool pgc_evict_pages(PGC *cache, size_t max_skip, size_t max_evict);
-bool pgc_flush_pages(PGC *cache, size_t max_flushes);
-
-struct pgc_statistics pgc_get_statistics(PGC *cache);
-size_t pgc_hot_and_dirty_entries(PGC *cache);
-
-struct aral_statistics *pgc_aral_statistics(void);
-size_t pgc_aral_structures(void);
-size_t pgc_aral_overhead(void);
-
-#endif // DBENGINE_CACHE_H
diff --git a/database/engine/datafile.c b/database/engine/datafile.c
deleted file mode 100644
index 7322039cd..000000000
--- a/database/engine/datafile.c
+++ /dev/null
@@ -1,611 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-#include "rrdengine.h"
-
-void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile, bool having_lock)
-{
- if(!having_lock)
- uv_rwlock_wrlock(&ctx->datafiles.rwlock);
-
- DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ctx->datafiles.first, datafile, prev, next);
-
- if(!having_lock)
- uv_rwlock_wrunlock(&ctx->datafiles.rwlock);
-}
-
-void datafile_list_delete_unsafe(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile)
-{
- DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ctx->datafiles.first, datafile, prev, next);
-}
-
-
-static struct rrdengine_datafile *datafile_alloc_and_init(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno)
-{
- fatal_assert(tier == 1);
-
- struct rrdengine_datafile *datafile = callocz(1, sizeof(struct rrdengine_datafile));
-
- datafile->tier = tier;
- datafile->fileno = fileno;
- fatal_assert(0 == uv_rwlock_init(&datafile->extent_rwlock));
- datafile->ctx = ctx;
-
- datafile->users.available = true;
-
- spinlock_init(&datafile->users.spinlock);
- spinlock_init(&datafile->writers.spinlock);
- spinlock_init(&datafile->extent_queries.spinlock);
-
- return datafile;
-}
-
-bool datafile_acquire(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason) {
- bool ret;
-
- spinlock_lock(&df->users.spinlock);
-
- if(df->users.available) {
- ret = true;
- df->users.lockers++;
- df->users.lockers_by_reason[reason]++;
- }
- else
- ret = false;
-
- spinlock_unlock(&df->users.spinlock);
-
- return ret;
-}
-
-void datafile_release(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason) {
- spinlock_lock(&df->users.spinlock);
- if(!df->users.lockers)
- fatal("DBENGINE DATAFILE: cannot release a datafile that is not acquired");
-
- df->users.lockers--;
- df->users.lockers_by_reason[reason]--;
- spinlock_unlock(&df->users.spinlock);
-}
-
-bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) {
- bool can_be_deleted = false;
-
- spinlock_lock(&df->users.spinlock);
- df->users.available = false;
-
- if(!df->users.lockers)
- can_be_deleted = true;
-
- else {
- // there are lockers
-
- // evict any pages referencing this in the open cache
- spinlock_unlock(&df->users.spinlock);
- pgc_open_evict_clean_pages_of_datafile(open_cache, df);
- spinlock_lock(&df->users.spinlock);
-
- if(!df->users.lockers)
- can_be_deleted = true;
-
- else {
- // there are lockers still
-
- // count the number of pages referencing this in the open cache
- spinlock_unlock(&df->users.spinlock);
- usec_t time_to_scan_ut = now_monotonic_usec();
- size_t clean_pages_in_open_cache = pgc_count_clean_pages_having_data_ptr(open_cache, (Word_t)df->ctx, df);
- size_t hot_pages_in_open_cache = pgc_count_hot_pages_having_data_ptr(open_cache, (Word_t)df->ctx, df);
- time_to_scan_ut = now_monotonic_usec() - time_to_scan_ut;
- spinlock_lock(&df->users.spinlock);
-
- if(!df->users.lockers)
- can_be_deleted = true;
-
- else if(!clean_pages_in_open_cache && !hot_pages_in_open_cache) {
- // no pages in the open cache related to this datafile
-
- time_t now_s = now_monotonic_sec();
-
- if(!df->users.time_to_evict) {
- // first time we did the above
- df->users.time_to_evict = now_s + 120;
- internal_error(true, "DBENGINE: datafile %u of tier %d is not used by any open cache pages, "
- "but it has %u lockers (oc:%u, pd:%u), "
- "%zu clean and %zu hot open cache pages "
- "- will be deleted shortly "
- "(scanned open cache in %"PRIu64" usecs)",
- df->fileno, df->ctx->config.tier,
- df->users.lockers,
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_PAGE_DETAILS],
- clean_pages_in_open_cache,
- hot_pages_in_open_cache,
- time_to_scan_ut);
- }
-
- else if(now_s > df->users.time_to_evict) {
- // time expired, lets remove it
- can_be_deleted = true;
- internal_error(true, "DBENGINE: datafile %u of tier %d is not used by any open cache pages, "
- "but it has %u lockers (oc:%u, pd:%u), "
- "%zu clean and %zu hot open cache pages "
- "- will be deleted now "
- "(scanned open cache in %"PRIu64" usecs)",
- df->fileno, df->ctx->config.tier,
- df->users.lockers,
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_PAGE_DETAILS],
- clean_pages_in_open_cache,
- hot_pages_in_open_cache,
- time_to_scan_ut);
- }
- }
- else
- internal_error(true, "DBENGINE: datafile %u of tier %d "
- "has %u lockers (oc:%u, pd:%u), "
- "%zu clean and %zu hot open cache pages "
- "(scanned open cache in %"PRIu64" usecs)",
- df->fileno, df->ctx->config.tier,
- df->users.lockers,
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
- df->users.lockers_by_reason[DATAFILE_ACQUIRE_PAGE_DETAILS],
- clean_pages_in_open_cache,
- hot_pages_in_open_cache,
- time_to_scan_ut);
- }
- }
- spinlock_unlock(&df->users.spinlock);
-
- return can_be_deleted;
-}
-
-void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
-{
- (void) snprintfz(str, maxlen - 1, "%s/" DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION,
- datafile->ctx->config.dbfiles_path, datafile->tier, datafile->fileno);
-}
-
-int close_data_file(struct rrdengine_datafile *datafile)
-{
- struct rrdengine_instance *ctx = datafile->ctx;
- uv_fs_t req;
- int ret;
- char path[RRDENG_PATH_MAX];
-
- generate_datafilepath(datafile, path, sizeof(path));
-
- ret = uv_fs_close(NULL, &req, datafile->file, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_close(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
-
- return ret;
-}
-
-int unlink_data_file(struct rrdengine_datafile *datafile)
-{
- struct rrdengine_instance *ctx = datafile->ctx;
- uv_fs_t req;
- int ret;
- char path[RRDENG_PATH_MAX];
-
- generate_datafilepath(datafile, path, sizeof(path));
-
- ret = uv_fs_unlink(NULL, &req, path, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
-
- __atomic_add_fetch(&ctx->stats.datafile_deletions, 1, __ATOMIC_RELAXED);
-
- return ret;
-}
-
-int destroy_data_file_unsafe(struct rrdengine_datafile *datafile)
-{
- struct rrdengine_instance *ctx = datafile->ctx;
- uv_fs_t req;
- int ret;
- char path[RRDENG_PATH_MAX];
-
- generate_datafilepath(datafile, path, sizeof(path));
-
- ret = uv_fs_ftruncate(NULL, &req, datafile->file, 0, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_ftruncate(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
-
- ret = uv_fs_close(NULL, &req, datafile->file, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_close(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
-
- ret = uv_fs_unlink(NULL, &req, path, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_fsunlink(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
-
- __atomic_add_fetch(&ctx->stats.datafile_deletions, 1, __ATOMIC_RELAXED);
-
- return ret;
-}
-
-int create_data_file(struct rrdengine_datafile *datafile)
-{
- struct rrdengine_instance *ctx = datafile->ctx;
- uv_fs_t req;
- uv_file file;
- int ret, fd;
- struct rrdeng_df_sb *superblock;
- uv_buf_t iov;
- char path[RRDENG_PATH_MAX];
-
- generate_datafilepath(datafile, path, sizeof(path));
- fd = open_file_for_io(path, O_CREAT | O_RDWR | O_TRUNC, &file, use_direct_io);
- if (fd < 0) {
- ctx_fs_error(ctx);
- return fd;
- }
- datafile->file = file;
- __atomic_add_fetch(&ctx->stats.datafile_creations, 1, __ATOMIC_RELAXED);
-
- ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
- if (unlikely(ret)) {
- fatal("DBENGINE: posix_memalign:%s", strerror(ret));
- }
- memset(superblock, 0, sizeof(*superblock));
- (void) strncpy(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ);
- (void) strncpy(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ);
- superblock->tier = 1;
-
- iov = uv_buf_init((void *)superblock, sizeof(*superblock));
-
- ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL);
- if (ret < 0) {
- fatal_assert(req.result < 0);
- netdata_log_error("DBENGINE: uv_fs_write: %s", uv_strerror(ret));
- ctx_io_error(ctx);
- }
- uv_fs_req_cleanup(&req);
- posix_memfree(superblock);
- if (ret < 0) {
- destroy_data_file_unsafe(datafile);
- return ret;
- }
-
- datafile->pos = sizeof(*superblock);
- ctx_io_write_op_bytes(ctx, sizeof(*superblock));
-
- return 0;
-}
-
-static int check_data_file_superblock(uv_file file)
-{
- int ret;
- struct rrdeng_df_sb *superblock;
- uv_buf_t iov;
- uv_fs_t req;
-
- ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
- if (unlikely(ret)) {
- fatal("DBENGINE: posix_memalign:%s", strerror(ret));
- }
- iov = uv_buf_init((void *)superblock, sizeof(*superblock));
-
- ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_read: %s", uv_strerror(ret));
- uv_fs_req_cleanup(&req);
- goto error;
- }
- fatal_assert(req.result >= 0);
- uv_fs_req_cleanup(&req);
-
- if (strncmp(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ) ||
- strncmp(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ) ||
- superblock->tier != 1) {
- netdata_log_error("DBENGINE: file has invalid superblock.");
- ret = UV_EINVAL;
- } else {
- ret = 0;
- }
- error:
- posix_memfree(superblock);
- return ret;
-}
-
-static int load_data_file(struct rrdengine_datafile *datafile)
-{
- struct rrdengine_instance *ctx = datafile->ctx;
- uv_fs_t req;
- uv_file file;
- int ret, fd, error;
- uint64_t file_size;
- char path[RRDENG_PATH_MAX];
-
- generate_datafilepath(datafile, path, sizeof(path));
- fd = open_file_for_io(path, O_RDWR, &file, use_direct_io);
- if (fd < 0) {
- ctx_fs_error(ctx);
- return fd;
- }
-
- nd_log_daemon(NDLP_DEBUG, "DBENGINE: initializing data file \"%s\".", path);
-
- ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb));
- if (ret)
- goto error;
- file_size = ALIGN_BYTES_CEILING(file_size);
-
- ret = check_data_file_superblock(file);
- if (ret)
- goto error;
-
- ctx_io_read_op_bytes(ctx, sizeof(struct rrdeng_df_sb));
-
- datafile->file = file;
- datafile->pos = file_size;
-
- nd_log_daemon(NDLP_DEBUG, "DBENGINE: data file \"%s\" initialized (size:%" PRIu64 ").", path, file_size);
-
- return 0;
-
- error:
- error = ret;
- ret = uv_fs_close(NULL, &req, file, NULL);
- if (ret < 0) {
- netdata_log_error("DBENGINE: uv_fs_close(%s): %s", path, uv_strerror(ret));
- ctx_fs_error(ctx);
- }
- uv_fs_req_cleanup(&req);
- return error;
-}
-
-static int scan_data_files_cmp(const void *a, const void *b)
-{
- struct rrdengine_datafile *file1, *file2;
- char path1[RRDENG_PATH_MAX], path2[RRDENG_PATH_MAX];
-
- file1 = *(struct rrdengine_datafile **)a;
- file2 = *(struct rrdengine_datafile **)b;
- generate_datafilepath(file1, path1, sizeof(path1));
- generate_datafilepath(file2, path2, sizeof(path2));
- return strcmp(path1, path2);
-}
-
-/* Returns number of datafiles that were loaded or < 0 on error */
-static int scan_data_files(struct rrdengine_instance *ctx)
-{
- int ret, matched_files, failed_to_load, i;
- unsigned tier, no;
- uv_fs_t req;
- uv_dirent_t dent;
- struct rrdengine_datafile **datafiles, *datafile;
- struct rrdengine_journalfile *journalfile;
-
- ret = uv_fs_scandir(NULL, &req, ctx->config.dbfiles_path, 0, NULL);
- if (ret < 0) {
- fatal_assert(req.result < 0);
- uv_fs_req_cleanup(&req);
- netdata_log_error("DBENGINE: uv_fs_scandir(%s): %s", ctx->config.dbfiles_path, uv_strerror(ret));
- ctx_fs_error(ctx);
- return ret;
- }
- netdata_log_info("DBENGINE: found %d files in path %s", ret, ctx->config.dbfiles_path);
-
- datafiles = callocz(MIN(ret, MAX_DATAFILES), sizeof(*datafiles));
- for (matched_files = 0 ; UV_EOF != uv_fs_scandir_next(&req, &dent) && matched_files < MAX_DATAFILES ; ) {
- ret = sscanf(dent.name, DATAFILE_PREFIX RRDENG_FILE_NUMBER_SCAN_TMPL DATAFILE_EXTENSION, &tier, &no);
- if (2 == ret) {
- datafile = datafile_alloc_and_init(ctx, tier, no);
- datafiles[matched_files++] = datafile;
- }
- }
- uv_fs_req_cleanup(&req);
-
- if (0 == matched_files) {
- freez(datafiles);
- return 0;
- }
-
- if (matched_files == MAX_DATAFILES)
- netdata_log_error("DBENGINE: warning: hit maximum database engine file limit of %d files", MAX_DATAFILES);
-
- qsort(datafiles, matched_files, sizeof(*datafiles), scan_data_files_cmp);
-
- ctx->atomic.last_fileno = datafiles[matched_files - 1]->fileno;
-
- netdata_log_info("DBENGINE: loading %d data/journal of tier %d...", matched_files, ctx->config.tier);
- for (failed_to_load = 0, i = 0 ; i < matched_files ; ++i) {
- uint8_t must_delete_pair = 0;
-
- datafile = datafiles[i];
- ret = load_data_file(datafile);
- if (0 != ret)
- must_delete_pair = 1;
-
- journalfile = journalfile_alloc_and_init(datafile);
- ret = journalfile_load(ctx, journalfile, datafile);
- if (0 != ret) {
- if (!must_delete_pair) /* If datafile is still open close it */
- close_data_file(datafile);
- must_delete_pair = 1;
- }
-
- if (must_delete_pair) {
- char path[RRDENG_PATH_MAX];
-
- netdata_log_error("DBENGINE: deleting invalid data and journal file pair.");
- ret = journalfile_unlink(journalfile);
- if (!ret) {
- journalfile_v1_generate_path(datafile, path, sizeof(path));
- netdata_log_info("DBENGINE: deleted journal file \"%s\".", path);
- }
- ret = unlink_data_file(datafile);
- if (!ret) {
- generate_datafilepath(datafile, path, sizeof(path));
- netdata_log_info("DBENGINE: deleted data file \"%s\".", path);
- }
- freez(journalfile);
- freez(datafile);
- ++failed_to_load;
- continue;
- }
-
- ctx_current_disk_space_increase(ctx, datafile->pos + journalfile->unsafe.pos);
- datafile_list_insert(ctx, datafile, false);
- }
-
- matched_files -= failed_to_load;
- freez(datafiles);
-
- return matched_files;
-}
-
-/* Creates a datafile and a journalfile pair */
-int create_new_datafile_pair(struct rrdengine_instance *ctx, bool having_lock)
-{
- __atomic_add_fetch(&rrdeng_cache_efficiency_stats.datafile_creation_started, 1, __ATOMIC_RELAXED);
-
- struct rrdengine_datafile *datafile;
- struct rrdengine_journalfile *journalfile;
- unsigned fileno = ctx_last_fileno_get(ctx) + 1;
- int ret;
- char path[RRDENG_PATH_MAX];
-
- nd_log(NDLS_DAEMON, NDLP_DEBUG,
- "DBENGINE: creating new data and journal files in path %s",
- ctx->config.dbfiles_path);
-
- datafile = datafile_alloc_and_init(ctx, 1, fileno);
- ret = create_data_file(datafile);
- if(ret)
- goto error_after_datafile;
-
- generate_datafilepath(datafile, path, sizeof(path));
- nd_log(NDLS_DAEMON, NDLP_INFO,
- "DBENGINE: created data file \"%s\".", path);
-
- journalfile = journalfile_alloc_and_init(datafile);
- ret = journalfile_create(journalfile, datafile);
- if (ret)
- goto error_after_journalfile;
-
- journalfile_v1_generate_path(datafile, path, sizeof(path));
- nd_log(NDLS_DAEMON, NDLP_INFO,
- "DBENGINE: created journal file \"%s\".", path);
-
- ctx_current_disk_space_increase(ctx, datafile->pos + journalfile->unsafe.pos);
- datafile_list_insert(ctx, datafile, having_lock);
- ctx_last_fileno_increment(ctx);
-
- return 0;
-
-error_after_journalfile:
- destroy_data_file_unsafe(datafile);
- freez(journalfile);
-
-error_after_datafile:
- freez(datafile);
- return ret;
-}
-
-/* Page cache must already be initialized.
- * Return 0 on success.
- */
-int init_data_files(struct rrdengine_instance *ctx)
-{
- int ret;
-
- fatal_assert(0 == uv_rwlock_init(&ctx->datafiles.rwlock));
- ret = scan_data_files(ctx);
- if (ret < 0) {
- netdata_log_error("DBENGINE: failed to scan path \"%s\".", ctx->config.dbfiles_path);
- return ret;
- } else if (0 == ret) {
- netdata_log_info("DBENGINE: data files not found, creating in path \"%s\".", ctx->config.dbfiles_path);
- ctx->atomic.last_fileno = 0;
- ret = create_new_datafile_pair(ctx, false);
- if (ret) {
- netdata_log_error("DBENGINE: failed to create data and journal files in path \"%s\".", ctx->config.dbfiles_path);
- return ret;
- }
- }
- else {
- if (ctx->loading.create_new_datafile_pair)
- create_new_datafile_pair(ctx, false);
-
- while(rrdeng_ctx_exceeded_disk_quota(ctx))
- datafile_delete(ctx, ctx->datafiles.first, false, false);
- }
-
- pgc_reset_hot_max(open_cache);
- ctx->loading.create_new_datafile_pair = false;
- return 0;
-}
-
-void finalize_data_files(struct rrdengine_instance *ctx)
-{
- bool logged = false;
-
- logged = false;
- while(__atomic_load_n(&ctx->atomic.extents_currently_being_flushed, __ATOMIC_RELAXED)) {
- if(!logged) {
- netdata_log_info("Waiting for inflight flush to finish on tier %d...", ctx->config.tier);
- logged = true;
- }
- sleep_usec(100 * USEC_PER_MS);
- }
-
- do {
- struct rrdengine_datafile *datafile = ctx->datafiles.first;
- struct rrdengine_journalfile *journalfile = datafile->journalfile;
-
- logged = false;
- size_t iterations = 100;
- while(!datafile_acquire_for_deletion(datafile) && datafile != ctx->datafiles.first->prev && --iterations > 0) {
- if(!logged) {
- netdata_log_info("Waiting to acquire data file %u of tier %d to close it...", datafile->fileno, ctx->config.tier);
- logged = true;
- }
- sleep_usec(100 * USEC_PER_MS);
- }
-
- logged = false;
- bool available = false;
- do {
- uv_rwlock_wrlock(&ctx->datafiles.rwlock);
- spinlock_lock(&datafile->writers.spinlock);
- available = (datafile->writers.running || datafile->writers.flushed_to_open_running) ? false : true;
-
- if(!available) {
- spinlock_unlock(&datafile->writers.spinlock);
- uv_rwlock_wrunlock(&ctx->datafiles.rwlock);
- if(!logged) {
- netdata_log_info("Waiting for writers to data file %u of tier %d to finish...", datafile->fileno, ctx->config.tier);
- logged = true;
- }
- sleep_usec(100 * USEC_PER_MS);
- }
- } while(!available);
-
- journalfile_close(journalfile, datafile);
- close_data_file(datafile);
- datafile_list_delete_unsafe(ctx, datafile);
- spinlock_unlock(&datafile->writers.spinlock);
- uv_rwlock_wrunlock(&ctx->datafiles.rwlock);
-
- freez(journalfile);
- freez(datafile);
-
- } while(ctx->datafiles.first);
-}
diff --git a/database/engine/datafile.ksy b/database/engine/datafile.ksy
deleted file mode 100644
index 28d4b3935..000000000
--- a/database/engine/datafile.ksy
+++ /dev/null
@@ -1,74 +0,0 @@
-meta:
- id: netdata_datafile
- endian: le
-
-seq:
- - id: hdr
- type: header
- size: 4096
- - id: extents
- type: extent
- repeat: eos
-
-types:
- header:
- seq:
- - id: magic
- contents: "netdata-data-file"
- - id: reserved
- size: 15
- - id: version
- contents: "1.0"
- - id: reserved1
- size: 13
- - id: tier
- type: u1
- extent_page_descr:
- seq:
- - id: type
- type: u1
- enum: page_type
- - id: uuid
- size: 16
- - id: page_len
- type: u4
- - id: start_time_ut
- type: u8
- - id: end_time_ut
- type: u8
- enums:
- page_type:
- 0: metrics
- 1: tier
- extent_header:
- seq:
- - id: payload_length
- type: u4
- - id: compression_algorithm
- type: u1
- enum: compression_algos
- - id: number_of_pages
- type: u1
- - id: page_descriptors
- type: extent_page_descr
- repeat: expr
- repeat-expr: number_of_pages
- enums:
- compression_algos:
- 0: rrd_no_compression
- 1: rrd_lz4
- extent_trailer:
- seq:
- - id: crc32_checksum
- type: u4
- extent:
- seq:
- - id: header
- type: extent_header
- - id: payload
- size: header.payload_length
- - id: trailer
- type: extent_trailer
- - id: padding
- size: (((_io.pos + 4095) / 4096) * 4096) - _io.pos
- # the extent size is made to always be a multiple of 4096
diff --git a/database/engine/journalfile_v2.ksy.in b/database/engine/journalfile_v2.ksy.in
deleted file mode 100644
index 6a656bc45..000000000
--- a/database/engine/journalfile_v2.ksy.in
+++ /dev/null
@@ -1,150 +0,0 @@
-meta:
- id: journalfile_v2`'ifdef(`VIRT_MEMBERS',`_virtmemb')
- endian: le
- application: netdata
- file-extension: njfv2
- license: GPL-3.0-or-later
-
-seq:
- - id: journal_v2_header
- type: journal_v2_header
- size: 4096
- - id: extent_list
- type: journal_v2_extent_list
- repeat: expr
- repeat-expr: journal_v2_header.extent_count
- - id: extent_trailer
- type: journal_v2_block_trailer
- - id: metric_list
- type: journal_v2_metric_list
- repeat: expr
- repeat-expr: journal_v2_header.metric_count
- - id: metric_trailer
- type: journal_v2_block_trailer
- - id: page_blocs
- type: journal_v2_page_block
- repeat: expr
- repeat-expr: _root.journal_v2_header.metric_count
- - id: padding
- size: _root._io.size - _root._io.pos - 4
- - id: journal_file_trailer
- type: journal_v2_block_trailer
-
-types:
- journal_v2_metric_list:
- seq:
- - id: uuid
- size: 16
- - id: entries
- type: u4
- - id: page_offset
- type: u4
- - id: delta_start_s
- type: u4
- - id: delta_end_s
- type: u4
-ifdef(`VIRT_MEMBERS',
-` instances:
- page_block:
- type: journal_v2_page_block
- io: _root._io
- pos: page_offset
-')dnl
- journal_v2_page_hdr:
- seq:
- - id: crc
- type: u4
- - id: uuid_offset
- type: u4
- - id: entries
- type: u4
- - id: uuid
- size: 16
- journal_v2_page_list:
- seq:
- - id: delta_start_s
- type: u4
- - id: delta_end_s
- type: u4
- - id: extent_idx
- type: u4
- - id: update_every_s
- type: u4
- - id: page_len
- type: u2
- - id: type
- type: u1
- - id: reserved
- type: u1
-ifdef(`VIRT_MEMBERS',
-` instances:
- extent:
- io: _root._io
- type: journal_v2_extent_list
- pos: _root.journal_v2_header.extent_offset + (extent_idx * 16)
-')dnl
- journal_v2_header:
- seq:
- - id: magic
- contents: [ 0x19, 0x10, 0x22, 0x01 ] #0x01221019
- - id: reserved
- type: u4
- - id: start_time_ut
- type: u8
- - id: end_time_ut
- type: u8
- - id: extent_count
- type: u4
- - id: extent_offset
- type: u4
- - id: metric_count
- type: u4
- - id: metric_offset
- type: u4
- - id: page_count
- type: u4
- - id: page_offset
- type: u4
- - id: extent_trailer_offset
- type: u4
- - id: metric_trailer_offset
- type: u4
- - id: original_file_size
- type: u4
- - id: total_file_size
- type: u4
- - id: data
- type: u8
-ifdef(`VIRT_MEMBERS',
-` instances:
- trailer:
- io: _root._io
- type: journal_v2_block_trailer
- pos: _root._io.size - 4
-')dnl
- journal_v2_block_trailer:
- seq:
- - id: checksum
- type: u4
- journal_v2_extent_list:
- seq:
- - id: datafile_offset
- type: u8
- - id: datafile_size
- type: u4
- - id: file_idx
- type: u2
- - id: page_cnt
- type: u1
- - id: padding
- type: u1
- journal_v2_page_block:
- seq:
- - id: hdr
- type: journal_v2_page_hdr
- - id: page_list
- type: journal_v2_page_list
- repeat: expr
- repeat-expr: hdr.entries
- - id: block_trailer
- type: journal_v2_block_trailer
diff --git a/database/engine/metric.c b/database/engine/metric.c
deleted file mode 100644
index 2e132612e..000000000
--- a/database/engine/metric.c
+++ /dev/null
@@ -1,873 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-#include "metric.h"
-
-typedef int32_t REFCOUNT;
-#define REFCOUNT_DELETING (-100)
-
-struct metric {
- uuid_t uuid; // never changes
- Word_t section; // never changes
-
- time_t first_time_s; // the timestamp of the oldest point in the database
- time_t latest_time_s_clean; // the timestamp of the newest point in the database
- time_t latest_time_s_hot; // the timestamp of the latest point that has been collected (not yet stored)
- uint32_t latest_update_every_s; // the latest data collection frequency
- pid_t writer;
- uint8_t partition;
- REFCOUNT refcount;
-
- // THIS IS allocated with malloc()
- // YOU HAVE TO INITIALIZE IT YOURSELF !
-};
-
-#define set_metric_field_with_condition(field, value, condition) ({ \
- typeof(field) _current = __atomic_load_n(&(field), __ATOMIC_RELAXED); \
- typeof(field) _wanted = value; \
- bool did_it = true; \
- \
- do { \
- if((condition) && (_current != _wanted)) { \
- ; \
- } \
- else { \
- did_it = false; \
- break; \
- } \
- } while(!__atomic_compare_exchange_n(&(field), &_current, _wanted, \
- false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); \
- \
- did_it; \
-})
-
-static struct aral_statistics mrg_aral_statistics;
-
-struct mrg {
- size_t partitions;
-
- struct mrg_partition {
- ARAL *aral; // not protected by our spinlock - it has its own
-
- RW_SPINLOCK rw_spinlock;
- Pvoid_t uuid_judy; // JudyHS: each UUID has a JudyL of sections (tiers)
-
- struct mrg_statistics stats;
- } index[];
-};
-
-static inline void MRG_STATS_DUPLICATE_ADD(MRG *mrg, size_t partition) {
- mrg->index[partition].stats.additions_duplicate++;
-}
-
-static inline void MRG_STATS_ADDED_METRIC(MRG *mrg, size_t partition) {
- mrg->index[partition].stats.entries++;
- mrg->index[partition].stats.additions++;
- mrg->index[partition].stats.size += sizeof(METRIC);
-}
-
-static inline void MRG_STATS_DELETED_METRIC(MRG *mrg, size_t partition) {
- mrg->index[partition].stats.entries--;
- mrg->index[partition].stats.size -= sizeof(METRIC);
- mrg->index[partition].stats.deletions++;
-}
-
-static inline void MRG_STATS_SEARCH_HIT(MRG *mrg, size_t partition) {
- __atomic_add_fetch(&mrg->index[partition].stats.search_hits, 1, __ATOMIC_RELAXED);
-}
-
-static inline void MRG_STATS_SEARCH_MISS(MRG *mrg, size_t partition) {
- __atomic_add_fetch(&mrg->index[partition].stats.search_misses, 1, __ATOMIC_RELAXED);
-}
-
-static inline void MRG_STATS_DELETE_MISS(MRG *mrg, size_t partition) {
- mrg->index[partition].stats.delete_misses++;
-}
-
-#define mrg_index_read_lock(mrg, partition) rw_spinlock_read_lock(&(mrg)->index[partition].rw_spinlock)
-#define mrg_index_read_unlock(mrg, partition) rw_spinlock_read_unlock(&(mrg)->index[partition].rw_spinlock)
-#define mrg_index_write_lock(mrg, partition) rw_spinlock_write_lock(&(mrg)->index[partition].rw_spinlock)
-#define mrg_index_write_unlock(mrg, partition) rw_spinlock_write_unlock(&(mrg)->index[partition].rw_spinlock)
-
-static inline void mrg_stats_size_judyl_change(MRG *mrg, size_t mem_before_judyl, size_t mem_after_judyl, size_t partition) {
- if(mem_after_judyl > mem_before_judyl)
- __atomic_add_fetch(&mrg->index[partition].stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
- else if(mem_after_judyl < mem_before_judyl)
- __atomic_sub_fetch(&mrg->index[partition].stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED);
-}
-
-static inline void mrg_stats_size_judyhs_added_uuid(MRG *mrg, size_t partition) {
- __atomic_add_fetch(&mrg->index[partition].stats.size, JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(uuid_t)), __ATOMIC_RELAXED);
-}
-
-static inline void mrg_stats_size_judyhs_removed_uuid(MRG *mrg, size_t partition) {
- __atomic_sub_fetch(&mrg->index[partition].stats.size, JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(uuid_t)), __ATOMIC_RELAXED);
-}
-
-static inline size_t uuid_partition(MRG *mrg __maybe_unused, uuid_t *uuid) {
- uint8_t *u = (uint8_t *)uuid;
- size_t *n = (size_t *)&u[UUID_SZ - sizeof(size_t)];
- return *n % mrg->partitions;
-}
-
-static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t first_time_s = __atomic_load_n(&metric->first_time_s, __ATOMIC_RELAXED);
-
- if(first_time_s <= 0) {
- first_time_s = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
- if(first_time_s <= 0)
- first_time_s = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
-
- if(first_time_s <= 0)
- first_time_s = 0;
- else
- __atomic_store_n(&metric->first_time_s, first_time_s, __ATOMIC_RELAXED);
- }
-
- return first_time_s;
-}
-
-static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
- size_t partition = metric->partition;
- REFCOUNT expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED);
- REFCOUNT refcount;
-
- do {
- if(expected < 0)
- fatal("METRIC: refcount is %d (negative) during acquire", metric->refcount);
-
- refcount = expected + 1;
- } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
-
- if(refcount == 1)
- __atomic_add_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED);
-
- __atomic_add_fetch(&mrg->index[partition].stats.current_references, 1, __ATOMIC_RELAXED);
-
- return refcount;
-}
-
-static inline bool metric_release_and_can_be_deleted(MRG *mrg __maybe_unused, METRIC *metric) {
- size_t partition = metric->partition;
- REFCOUNT expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED);
- REFCOUNT refcount;
-
- do {
- if(expected <= 0)
- fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
-
- refcount = expected - 1;
- } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
-
- if(unlikely(!refcount))
- __atomic_sub_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED);
-
- __atomic_sub_fetch(&mrg->index[partition].stats.current_references, 1, __ATOMIC_RELAXED);
-
- time_t first, last, ue;
- mrg_metric_get_retention(mrg, metric, &first, &last, &ue);
- return (!first || !last || first > last);
-}
-
-static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
- size_t partition = uuid_partition(mrg, entry->uuid);
-
- METRIC *allocation = aral_mallocz(mrg->index[partition].aral);
-
- mrg_index_write_lock(mrg, partition);
-
- size_t mem_before_judyl, mem_after_judyl;
-
- Pvoid_t *sections_judy_pptr = JudyHSIns(&mrg->index[partition].uuid_judy, entry->uuid, sizeof(uuid_t), PJE0);
- if(unlikely(!sections_judy_pptr || sections_judy_pptr == PJERR))
- fatal("DBENGINE METRIC: corrupted UUIDs JudyHS array");
-
- if(unlikely(!*sections_judy_pptr))
- mrg_stats_size_judyhs_added_uuid(mrg, partition);
-
- mem_before_judyl = JudyLMemUsed(*sections_judy_pptr);
- Pvoid_t *PValue = JudyLIns(sections_judy_pptr, entry->section, PJE0);
- mem_after_judyl = JudyLMemUsed(*sections_judy_pptr);
- mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition);
-
- if(unlikely(!PValue || PValue == PJERR))
- fatal("DBENGINE METRIC: corrupted section JudyL array");
-
- if(unlikely(*PValue != NULL)) {
- METRIC *metric = *PValue;
-
- metric_acquire(mrg, metric);
-
- MRG_STATS_DUPLICATE_ADD(mrg, partition);
-
- mrg_index_write_unlock(mrg, partition);
-
- if(ret)
- *ret = false;
-
- aral_freez(mrg->index[partition].aral, allocation);
-
- return metric;
- }
-
- METRIC *metric = allocation;
- uuid_copy(metric->uuid, *entry->uuid);
- metric->section = entry->section;
- metric->first_time_s = MAX(0, entry->first_time_s);
- metric->latest_time_s_clean = MAX(0, entry->last_time_s);
- metric->latest_time_s_hot = 0;
- metric->latest_update_every_s = entry->latest_update_every_s;
- metric->writer = 0;
- metric->refcount = 0;
- metric->partition = partition;
- metric_acquire(mrg, metric);
- *PValue = metric;
-
- MRG_STATS_ADDED_METRIC(mrg, partition);
-
- mrg_index_write_unlock(mrg, partition);
-
- if(ret)
- *ret = true;
-
- return metric;
-}
-
-static inline METRIC *metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section) {
- size_t partition = uuid_partition(mrg, uuid);
-
- mrg_index_read_lock(mrg, partition);
-
- Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, uuid, sizeof(uuid_t));
- if(unlikely(!sections_judy_pptr)) {
- mrg_index_read_unlock(mrg, partition);
- MRG_STATS_SEARCH_MISS(mrg, partition);
- return NULL;
- }
-
- Pvoid_t *PValue = JudyLGet(*sections_judy_pptr, section, PJE0);
- if(unlikely(!PValue)) {
- mrg_index_read_unlock(mrg, partition);
- MRG_STATS_SEARCH_MISS(mrg, partition);
- return NULL;
- }
-
- METRIC *metric = *PValue;
-
- metric_acquire(mrg, metric);
-
- mrg_index_read_unlock(mrg, partition);
-
- MRG_STATS_SEARCH_HIT(mrg, partition);
- return metric;
-}
-
-static inline bool acquired_metric_del(MRG *mrg, METRIC *metric) {
- size_t partition = metric->partition;
-
- size_t mem_before_judyl, mem_after_judyl;
-
- mrg_index_write_lock(mrg, partition);
-
- if(!metric_release_and_can_be_deleted(mrg, metric)) {
- mrg->index[partition].stats.delete_having_retention_or_referenced++;
- mrg_index_write_unlock(mrg, partition);
- return false;
- }
-
- Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t));
- if(unlikely(!sections_judy_pptr || !*sections_judy_pptr)) {
- MRG_STATS_DELETE_MISS(mrg, partition);
- mrg_index_write_unlock(mrg, partition);
- return false;
- }
-
- mem_before_judyl = JudyLMemUsed(*sections_judy_pptr);
- int rc = JudyLDel(sections_judy_pptr, metric->section, PJE0);
- mem_after_judyl = JudyLMemUsed(*sections_judy_pptr);
- mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition);
-
- if(unlikely(!rc)) {
- MRG_STATS_DELETE_MISS(mrg, partition);
- mrg_index_write_unlock(mrg, partition);
- return false;
- }
-
- if(!*sections_judy_pptr) {
- rc = JudyHSDel(&mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t), PJE0);
- if(unlikely(!rc))
- fatal("DBENGINE METRIC: cannot delete UUID from JudyHS");
- mrg_stats_size_judyhs_removed_uuid(mrg, partition);
- }
-
- MRG_STATS_DELETED_METRIC(mrg, partition);
-
- mrg_index_write_unlock(mrg, partition);
-
- aral_freez(mrg->index[partition].aral, metric);
-
- return true;
-}
-
-// ----------------------------------------------------------------------------
-// public API
-
-inline MRG *mrg_create(ssize_t partitions) {
- if(partitions < 1)
- partitions = get_netdata_cpus();
-
- MRG *mrg = callocz(1, sizeof(MRG) + sizeof(struct mrg_partition) * partitions);
- mrg->partitions = partitions;
-
- for(size_t i = 0; i < mrg->partitions ; i++) {
- rw_spinlock_init(&mrg->index[i].rw_spinlock);
-
- char buf[ARAL_MAX_NAME + 1];
- snprintfz(buf, ARAL_MAX_NAME, "mrg[%zu]", i);
-
- mrg->index[i].aral = aral_create(buf, sizeof(METRIC), 0, 16384, &mrg_aral_statistics, NULL, NULL, false, false);
- }
-
- return mrg;
-}
-
-inline size_t mrg_aral_structures(void) {
- return aral_structures_from_stats(&mrg_aral_statistics);
-}
-
-inline size_t mrg_aral_overhead(void) {
- return aral_overhead_from_stats(&mrg_aral_statistics);
-}
-
-inline void mrg_destroy(MRG *mrg __maybe_unused) {
- // no destruction possible
- // we can't traverse the metrics list
-
- // to delete entries, the caller needs to keep pointers to them
- // and delete them one by one
-
- ;
-}
-
-inline METRIC *mrg_metric_add_and_acquire(MRG *mrg, MRG_ENTRY entry, bool *ret) {
-// internal_fatal(entry.latest_time_s > max_acceptable_collected_time(),
-// "DBENGINE METRIC: metric latest time is in the future");
-
- return metric_add_and_acquire(mrg, &entry, ret);
-}
-
-inline METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section) {
- return metric_get_and_acquire(mrg, uuid, section);
-}
-
-inline bool mrg_metric_release_and_delete(MRG *mrg, METRIC *metric) {
- return acquired_metric_del(mrg, metric);
-}
-
-inline METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric) {
- metric_acquire(mrg, metric);
- return metric;
-}
-
-inline bool mrg_metric_release(MRG *mrg, METRIC *metric) {
- return metric_release_and_can_be_deleted(mrg, metric);
-}
-
-inline Word_t mrg_metric_id(MRG *mrg __maybe_unused, METRIC *metric) {
- return (Word_t)metric;
-}
-
-inline uuid_t *mrg_metric_uuid(MRG *mrg __maybe_unused, METRIC *metric) {
- return &metric->uuid;
-}
-
-inline Word_t mrg_metric_section(MRG *mrg __maybe_unused, METRIC *metric) {
- return metric->section;
-}
-
-inline bool mrg_metric_set_first_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s) {
- internal_fatal(first_time_s < 0, "DBENGINE METRIC: timestamp is negative");
-
- if(unlikely(first_time_s < 0))
- return false;
-
- __atomic_store_n(&metric->first_time_s, first_time_s, __ATOMIC_RELAXED);
-
- return true;
-}
-
-inline void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s) {
- internal_fatal(first_time_s < 0 || last_time_s < 0 || update_every_s < 0,
- "DBENGINE METRIC: timestamp is negative");
- internal_fatal(first_time_s > max_acceptable_collected_time(),
- "DBENGINE METRIC: metric first time is in the future");
- internal_fatal(last_time_s > max_acceptable_collected_time(),
- "DBENGINE METRIC: metric last time is in the future");
-
- if(first_time_s > 0)
- set_metric_field_with_condition(metric->first_time_s, first_time_s, _current <= 0 || _wanted < _current);
-
- if(last_time_s > 0) {
- if(set_metric_field_with_condition(metric->latest_time_s_clean, last_time_s, _current <= 0 || _wanted > _current) &&
- update_every_s > 0)
- // set the latest update every too
- set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, true);
- }
- else if(update_every_s > 0)
- // set it only if it is invalid
- set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, _current <= 0);
-}
-
-inline bool mrg_metric_set_first_time_s_if_bigger(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s) {
- internal_fatal(first_time_s < 0, "DBENGINE METRIC: timestamp is negative");
- return set_metric_field_with_condition(metric->first_time_s, first_time_s, _wanted > _current);
-}
-
-inline time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
- return mrg_metric_get_first_time_s_smart(mrg, metric);
-}
-
-inline void mrg_metric_get_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s) {
- time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
- time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
-
- *last_time_s = MAX(clean, hot);
- *first_time_s = mrg_metric_get_first_time_s_smart(mrg, metric);
- *update_every_s = __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED);
-}
-
-inline bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
- internal_fatal(latest_time_s < 0, "DBENGINE METRIC: timestamp is negative");
-
-// internal_fatal(latest_time_s > max_acceptable_collected_time(),
-// "DBENGINE METRIC: metric latest time is in the future");
-
-// internal_fatal(metric->latest_time_s_clean > latest_time_s,
-// "DBENGINE METRIC: metric new clean latest time is older than the previous one");
-
- if(latest_time_s > 0) {
- if(set_metric_field_with_condition(metric->latest_time_s_clean, latest_time_s, true)) {
- set_metric_field_with_condition(metric->first_time_s, latest_time_s, _current <= 0 || _wanted < _current);
-
- return true;
- }
- }
-
- return false;
-}
-
-// returns true when metric still has retention
-inline bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metric) {
- Word_t section = mrg_metric_section(mrg, metric);
- bool do_again = false;
- size_t countdown = 5;
-
- do {
- time_t min_first_time_s = LONG_MAX;
- time_t max_end_time_s = 0;
- PGC_PAGE *page;
- PGC_SEARCH method = PGC_SEARCH_FIRST;
- time_t page_first_time_s = 0;
- time_t page_end_time_s = 0;
- while ((page = pgc_page_get_and_acquire(main_cache, section, (Word_t)metric, page_first_time_s, method))) {
- method = PGC_SEARCH_NEXT;
-
- bool is_hot = pgc_is_page_hot(page);
- bool is_dirty = pgc_is_page_dirty(page);
- page_first_time_s = pgc_page_start_time_s(page);
- page_end_time_s = pgc_page_end_time_s(page);
-
- if ((is_hot || is_dirty) && page_first_time_s > 0 && page_first_time_s < min_first_time_s)
- min_first_time_s = page_first_time_s;
-
- if (is_dirty && page_end_time_s > max_end_time_s)
- max_end_time_s = page_end_time_s;
-
- pgc_page_release(main_cache, page);
- }
-
- if (min_first_time_s == LONG_MAX)
- min_first_time_s = 0;
-
- if (--countdown && !min_first_time_s && __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED))
- do_again = true;
- else {
- internal_error(!countdown, "METRIC: giving up on updating the retention of metric without disk retention");
-
- do_again = false;
- set_metric_field_with_condition(metric->first_time_s, min_first_time_s, true);
- set_metric_field_with_condition(metric->latest_time_s_clean, max_end_time_s, true);
- }
- } while(do_again);
-
- time_t first, last, ue;
- mrg_metric_get_retention(mrg, metric, &first, &last, &ue);
- return (first && last && first < last);
-}
-
-inline bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
- internal_fatal(latest_time_s < 0, "DBENGINE METRIC: timestamp is negative");
-
-// internal_fatal(latest_time_s > max_acceptable_collected_time(),
-// "DBENGINE METRIC: metric latest time is in the future");
-
- if(likely(latest_time_s > 0)) {
- __atomic_store_n(&metric->latest_time_s_hot, latest_time_s, __ATOMIC_RELAXED);
- return true;
- }
-
- return false;
-}
-
-inline time_t mrg_metric_get_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
- time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
-
- return MAX(clean, hot);
-}
-
-inline bool mrg_metric_set_update_every(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) {
- internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative");
-
- if(update_every_s > 0)
- return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, true);
-
- return false;
-}
-
-inline bool mrg_metric_set_update_every_s_if_zero(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) {
- internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative");
-
- if(update_every_s > 0)
- return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, _current <= 0);
-
- return false;
-}
-
-inline time_t mrg_metric_get_update_every_s(MRG *mrg __maybe_unused, METRIC *metric) {
- return __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED);
-}
-
-inline bool mrg_metric_set_writer(MRG *mrg, METRIC *metric) {
- pid_t expected = __atomic_load_n(&metric->writer, __ATOMIC_RELAXED);
- pid_t wanted = gettid();
- bool done = true;
-
- do {
- if(expected != 0) {
- done = false;
- break;
- }
- } while(!__atomic_compare_exchange_n(&metric->writer, &expected, wanted, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
-
- if(done)
- __atomic_add_fetch(&mrg->index[metric->partition].stats.writers, 1, __ATOMIC_RELAXED);
- else
- __atomic_add_fetch(&mrg->index[metric->partition].stats.writers_conflicts, 1, __ATOMIC_RELAXED);
-
- return done;
-}
-
-inline bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric) {
- // this function can be called from a different thread than the one than the writer
-
- pid_t expected = __atomic_load_n(&metric->writer, __ATOMIC_RELAXED);
- pid_t wanted = 0;
- bool done = true;
-
- do {
- if(!expected) {
- done = false;
- break;
- }
- } while(!__atomic_compare_exchange_n(&metric->writer, &expected, wanted, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
-
- if(done)
- __atomic_sub_fetch(&mrg->index[metric->partition].stats.writers, 1, __ATOMIC_RELAXED);
-
- return done;
-}
-
-inline void mrg_update_metric_retention_and_granularity_by_uuid(
- MRG *mrg, Word_t section, uuid_t *uuid,
- time_t first_time_s, time_t last_time_s,
- time_t update_every_s, time_t now_s)
-{
- if(unlikely(last_time_s > now_s)) {
- nd_log_limit_static_global_var(erl, 1, 0);
- nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
- "DBENGINE JV2: wrong last time on-disk (%ld - %ld, now %ld), "
- "fixing last time to now",
- first_time_s, last_time_s, now_s);
- last_time_s = now_s;
- }
-
- if (unlikely(first_time_s > last_time_s)) {
- nd_log_limit_static_global_var(erl, 1, 0);
- nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
- "DBENGINE JV2: wrong first time on-disk (%ld - %ld, now %ld), "
- "fixing first time to last time",
- first_time_s, last_time_s, now_s);
-
- first_time_s = last_time_s;
- }
-
- if (unlikely(first_time_s == 0 || last_time_s == 0)) {
- nd_log_limit_static_global_var(erl, 1, 0);
- nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
- "DBENGINE JV2: zero on-disk timestamps (%ld - %ld, now %ld), "
- "using them as-is",
- first_time_s, last_time_s, now_s);
- }
-
- bool added = false;
- METRIC *metric = mrg_metric_get_and_acquire(mrg, uuid, section);
- if (!metric) {
- MRG_ENTRY entry = {
- .uuid = uuid,
- .section = section,
- .first_time_s = first_time_s,
- .last_time_s = last_time_s,
- .latest_update_every_s = (uint32_t) update_every_s
- };
- metric = mrg_metric_add_and_acquire(mrg, entry, &added);
- }
-
- if (likely(!added))
- mrg_metric_expand_retention(mrg, metric, first_time_s, last_time_s, update_every_s);
-
- mrg_metric_release(mrg, metric);
-}
-
-inline void mrg_get_statistics(MRG *mrg, struct mrg_statistics *s) {
- memset(s, 0, sizeof(struct mrg_statistics));
-
- for(size_t i = 0; i < mrg->partitions ;i++) {
- s->entries += __atomic_load_n(&mrg->index[i].stats.entries, __ATOMIC_RELAXED);
- s->entries_referenced += __atomic_load_n(&mrg->index[i].stats.entries_referenced, __ATOMIC_RELAXED);
- s->size += __atomic_load_n(&mrg->index[i].stats.size, __ATOMIC_RELAXED);
- s->current_references += __atomic_load_n(&mrg->index[i].stats.current_references, __ATOMIC_RELAXED);
- s->additions += __atomic_load_n(&mrg->index[i].stats.additions, __ATOMIC_RELAXED);
- s->additions_duplicate += __atomic_load_n(&mrg->index[i].stats.additions_duplicate, __ATOMIC_RELAXED);
- s->deletions += __atomic_load_n(&mrg->index[i].stats.deletions, __ATOMIC_RELAXED);
- s->delete_having_retention_or_referenced += __atomic_load_n(&mrg->index[i].stats.delete_having_retention_or_referenced, __ATOMIC_RELAXED);
- s->delete_misses += __atomic_load_n(&mrg->index[i].stats.delete_misses, __ATOMIC_RELAXED);
- s->search_hits += __atomic_load_n(&mrg->index[i].stats.search_hits, __ATOMIC_RELAXED);
- s->search_misses += __atomic_load_n(&mrg->index[i].stats.search_misses, __ATOMIC_RELAXED);
- s->writers += __atomic_load_n(&mrg->index[i].stats.writers, __ATOMIC_RELAXED);
- s->writers_conflicts += __atomic_load_n(&mrg->index[i].stats.writers_conflicts, __ATOMIC_RELAXED);
- }
-
- s->size += sizeof(MRG) + sizeof(struct mrg_partition) * mrg->partitions;
-}
-
-// ----------------------------------------------------------------------------
-// unit test
-
-struct mrg_stress_entry {
- uuid_t uuid;
- time_t after;
- time_t before;
-};
-
-struct mrg_stress {
- MRG *mrg;
- bool stop;
- size_t entries;
- struct mrg_stress_entry *array;
- size_t updates;
-};
-
-static void *mrg_stress(void *ptr) {
- struct mrg_stress *t = ptr;
- MRG *mrg = t->mrg;
-
- ssize_t start = 0;
- ssize_t end = (ssize_t)t->entries;
- ssize_t step = 1;
-
- if(gettid() % 2) {
- start = (ssize_t)t->entries - 1;
- end = -1;
- step = -1;
- }
-
- while(!__atomic_load_n(&t->stop, __ATOMIC_RELAXED)) {
- for (ssize_t i = start; i != end; i += step) {
- struct mrg_stress_entry *e = &t->array[i];
-
- time_t after = __atomic_sub_fetch(&e->after, 1, __ATOMIC_RELAXED);
- time_t before = __atomic_add_fetch(&e->before, 1, __ATOMIC_RELAXED);
-
- mrg_update_metric_retention_and_granularity_by_uuid(
- mrg, 0x01,
- &e->uuid,
- after,
- before,
- 1,
- before);
-
- __atomic_add_fetch(&t->updates, 1, __ATOMIC_RELAXED);
- }
- }
-
- return ptr;
-}
-
-int mrg_unittest(void) {
- MRG *mrg = mrg_create(0);
- METRIC *m1_t0, *m2_t0, *m3_t0, *m4_t0;
- METRIC *m1_t1, *m2_t1, *m3_t1, *m4_t1;
- bool ret;
-
- uuid_t test_uuid;
- uuid_generate(test_uuid);
- MRG_ENTRY entry = {
- .uuid = &test_uuid,
- .section = 0,
- .first_time_s = 2,
- .last_time_s = 3,
- .latest_update_every_s = 4,
- };
- m1_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
- if(!ret)
- fatal("DBENGINE METRIC: failed to add metric");
-
- // add the same metric again
- m2_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
- if(m2_t0 != m1_t0)
- fatal("DBENGINE METRIC: adding the same metric twice, does not return the same pointer");
- if(ret)
- fatal("DBENGINE METRIC: managed to add the same metric twice");
-
- m3_t0 = mrg_metric_get_and_acquire(mrg, entry.uuid, entry.section);
- if(m3_t0 != m1_t0)
- fatal("DBENGINE METRIC: cannot find the metric added");
-
- // add the same metric again
- m4_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
- if(m4_t0 != m1_t0)
- fatal("DBENGINE METRIC: adding the same metric twice, does not return the same pointer");
- if(ret)
- fatal("DBENGINE METRIC: managed to add the same metric twice");
-
- // add the same metric in another section
- entry.section = 1;
- m1_t1 = mrg_metric_add_and_acquire(mrg, entry, &ret);
- if(!ret)
- fatal("DBENGINE METRIC: failed to add metric in section %zu", (size_t)entry.section);
-
- // add the same metric again
- m2_t1 = mrg_metric_add_and_acquire(mrg, entry, &ret);
- if(m2_t1 != m1_t1)
- fatal("DBENGINE METRIC: adding the same metric twice (section %zu), does not return the same pointer", (size_t)entry.section);
- if(ret)
- fatal("DBENGINE METRIC: managed to add the same metric twice in (section 0)");
-
- m3_t1 = mrg_metric_get_and_acquire(mrg, entry.uuid, entry.section);
- if(m3_t1 != m1_t1)
- fatal("DBENGINE METRIC: cannot find the metric added (section %zu)", (size_t)entry.section);
-
- // delete the first metric
- mrg_metric_release(mrg, m2_t0);
- mrg_metric_release(mrg, m3_t0);
- mrg_metric_release(mrg, m4_t0);
- mrg_metric_set_first_time_s(mrg, m1_t0, 0);
- mrg_metric_set_clean_latest_time_s(mrg, m1_t0, 0);
- mrg_metric_set_hot_latest_time_s(mrg, m1_t0, 0);
- if(!mrg_metric_release_and_delete(mrg, m1_t0))
- fatal("DBENGINE METRIC: cannot delete the first metric");
-
- m4_t1 = mrg_metric_get_and_acquire(mrg, entry.uuid, entry.section);
- if(m4_t1 != m1_t1)
- fatal("DBENGINE METRIC: cannot find the metric added (section %zu), after deleting the first one", (size_t)entry.section);
-
- // delete the second metric
- mrg_metric_release(mrg, m2_t1);
- mrg_metric_release(mrg, m3_t1);
- mrg_metric_release(mrg, m4_t1);
- mrg_metric_set_first_time_s(mrg, m1_t1, 0);
- mrg_metric_set_clean_latest_time_s(mrg, m1_t1, 0);
- mrg_metric_set_hot_latest_time_s(mrg, m1_t1, 0);
- if(!mrg_metric_release_and_delete(mrg, m1_t1))
- fatal("DBENGINE METRIC: cannot delete the second metric");
-
- struct mrg_statistics s;
- mrg_get_statistics(mrg, &s);
- if(s.entries != 0)
- fatal("DBENGINE METRIC: invalid entries counter");
-
- size_t entries = 1000000;
- size_t threads = mrg->partitions / 3 + 1;
- size_t tiers = 3;
- size_t run_for_secs = 5;
- netdata_log_info("preparing stress test of %zu entries...", entries);
- struct mrg_stress t = {
- .mrg = mrg,
- .entries = entries,
- .array = callocz(entries, sizeof(struct mrg_stress_entry)),
- };
-
- time_t now = max_acceptable_collected_time();
- for(size_t i = 0; i < entries ;i++) {
- uuid_generate_random(t.array[i].uuid);
- t.array[i].after = now / 3;
- t.array[i].before = now / 2;
- }
- netdata_log_info("stress test is populating MRG with 3 tiers...");
- for(size_t i = 0; i < entries ;i++) {
- struct mrg_stress_entry *e = &t.array[i];
- for(size_t tier = 1; tier <= tiers ;tier++) {
- mrg_update_metric_retention_and_granularity_by_uuid(
- mrg, tier,
- &e->uuid,
- e->after,
- e->before,
- 1,
- e->before);
- }
- }
- netdata_log_info("stress test ready to run...");
-
- usec_t started_ut = now_monotonic_usec();
-
- pthread_t th[threads];
- for(size_t i = 0; i < threads ; i++) {
- char buf[15 + 1];
- snprintfz(buf, sizeof(buf) - 1, "TH[%zu]", i);
- netdata_thread_create(&th[i], buf,
- NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
- mrg_stress, &t);
- }
-
- sleep_usec(run_for_secs * USEC_PER_SEC);
- __atomic_store_n(&t.stop, true, __ATOMIC_RELAXED);
-
- for(size_t i = 0; i < threads ; i++)
- netdata_thread_cancel(th[i]);
-
- for(size_t i = 0; i < threads ; i++)
- netdata_thread_join(th[i], NULL);
-
- usec_t ended_ut = now_monotonic_usec();
-
- struct mrg_statistics stats;
- mrg_get_statistics(mrg, &stats);
-
- netdata_log_info("DBENGINE METRIC: did %zu additions, %zu duplicate additions, "
- "%zu deletions, %zu wrong deletions, "
- "%zu successful searches, %zu wrong searches, "
- "in %"PRIu64" usecs",
- stats.additions, stats.additions_duplicate,
- stats.deletions, stats.delete_misses,
- stats.search_hits, stats.search_misses,
- ended_ut - started_ut);
-
- netdata_log_info("DBENGINE METRIC: updates performance: %0.2fk/sec total, %0.2fk/sec/thread",
- (double)t.updates / (double)((ended_ut - started_ut) / USEC_PER_SEC) / 1000.0,
- (double)t.updates / (double)((ended_ut - started_ut) / USEC_PER_SEC) / 1000.0 / threads);
-
- mrg_destroy(mrg);
-
- netdata_log_info("DBENGINE METRIC: all tests passed!");
-
- return 0;
-}
diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h
deleted file mode 100644
index dbcbea53a..000000000
--- a/database/engine/pagecache.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#ifndef NETDATA_PAGECACHE_H
-#define NETDATA_PAGECACHE_H
-
-#include "rrdengine.h"
-
-extern struct mrg *main_mrg;
-extern struct pgc *main_cache;
-extern struct pgc *open_cache;
-extern struct pgc *extent_cache;
-
-/* Forward declarations */
-struct rrdengine_instance;
-
-#define INVALID_TIME (0)
-#define MAX_PAGE_CACHE_FETCH_RETRIES (3)
-#define PAGE_CACHE_FETCH_WAIT_TIMEOUT (3)
-
-extern struct rrdeng_cache_efficiency_stats rrdeng_cache_efficiency_stats;
-
-struct page_descr_with_data {
- uuid_t *id;
- Word_t metric_id;
- usec_t start_time_ut;
- usec_t end_time_ut;
- uint8_t type;
- uint32_t update_every_s;
- uint32_t page_length;
- struct pgd *pgd;
-
- struct {
- struct page_descr_with_data *prev;
- struct page_descr_with_data *next;
- } link;
-};
-
-#define PAGE_INFO_SCRATCH_SZ (8)
-struct rrdeng_page_info {
- uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */
-
- usec_t start_time_ut;
- usec_t end_time_ut;
- uint32_t page_length;
-};
-
-struct pg_alignment {
- uint32_t refcount;
-};
-
-struct rrdeng_query_handle;
-struct page_details_control;
-
-void rrdeng_prep_wait(struct page_details_control *pdc);
-void rrdeng_prep_query(struct page_details_control *pdc, bool worker);
-void pg_cache_preload(struct rrdeng_query_handle *handle);
-struct pgc_page *pg_cache_lookup_next(struct rrdengine_instance *ctx, struct page_details_control *pdc, time_t now_s, time_t last_update_every_s, size_t *entries);
-void pgc_and_mrg_initialize(void);
-
-void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, time_t update_every_s, struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length);
-
-#endif /* NETDATA_PAGECACHE_H */
diff --git a/database/engine/rrdenginelib.c b/database/engine/rrdenginelib.c
deleted file mode 100644
index dc581d98d..000000000
--- a/database/engine/rrdenginelib.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-#include "rrdengine.h"
-
-int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size)
-{
- int ret;
- uv_fs_t req;
- uv_stat_t* s;
-
- ret = uv_fs_fstat(NULL, &req, file, NULL);
- if (ret < 0) {
- fatal("uv_fs_fstat: %s\n", uv_strerror(ret));
- }
- fatal_assert(req.result == 0);
- s = req.ptr;
- if (!(s->st_mode & S_IFREG)) {
- netdata_log_error("Not a regular file.\n");
- uv_fs_req_cleanup(&req);
- return UV_EINVAL;
- }
- if (s->st_size < min_size) {
- netdata_log_error("File length is too short.\n");
- uv_fs_req_cleanup(&req);
- return UV_EINVAL;
- }
- *file_size = s->st_size;
- uv_fs_req_cleanup(&req);
-
- return 0;
-}
-
-/**
- * Open file for I/O.
- *
- * @param path The full path of the file.
- * @param flags Same flags as the open() system call uses.
- * @param file On success sets (*file) to be the uv_file that was opened.
- * @param direct Tries to open a file in direct I/O mode when direct=1, falls back to buffered mode if not possible.
- * @return Returns UV error number that is < 0 on failure. 0 on success.
- */
-int open_file_for_io(char *path, int flags, uv_file *file, int direct)
-{
- uv_fs_t req;
- int fd = -1, current_flags;
-
- fatal_assert(0 == direct || 1 == direct);
- for ( ; direct >= 0 ; --direct) {
-#ifdef __APPLE__
- /* Apple OS does not support O_DIRECT */
- direct = 0;
-#endif
- current_flags = flags;
- if (direct) {
- current_flags |= O_DIRECT;
- }
- fd = uv_fs_open(NULL, &req, path, current_flags, S_IRUSR | S_IWUSR, NULL);
- if (fd < 0) {
- if ((direct) && (UV_EINVAL == fd)) {
- netdata_log_error("File \"%s\" does not support direct I/O, falling back to buffered I/O.", path);
- } else {
- netdata_log_error("Failed to open file \"%s\".", path);
- --direct; /* break the loop */
- }
- } else {
- fatal_assert(req.result >= 0);
- *file = req.result;
-#ifdef __APPLE__
- netdata_log_info("Disabling OS X caching for file \"%s\".", path);
- fcntl(fd, F_NOCACHE, 1);
-#endif
- --direct; /* break the loop */
- }
- uv_fs_req_cleanup(&req);
- }
-
- return fd;
-}
-
-int is_legacy_child(const char *machine_guid)
-{
- uuid_t uuid;
- char dbengine_file[FILENAME_MAX+1];
-
- if (unlikely(!strcmp(machine_guid, "unittest-dbengine") || !strcmp(machine_guid, "dbengine-dataset") ||
- !strcmp(machine_guid, "dbengine-stress-test"))) {
- return 1;
- }
- if (!uuid_parse(machine_guid, uuid)) {
- uv_fs_t stat_req;
- snprintfz(dbengine_file, FILENAME_MAX, "%s/%s/dbengine", netdata_configured_cache_dir, machine_guid);
- int rc = uv_fs_stat(NULL, &stat_req, dbengine_file, NULL);
- if (likely(rc == 0 && ((stat_req.statbuf.st_mode & S_IFMT) == S_IFDIR))) {
- //netdata_log_info("Found legacy engine folder \"%s\"", dbengine_file);
- return 1;
- }
- }
- return 0;
-}
-
-int count_legacy_children(char *dbfiles_path)
-{
- int ret;
- uv_fs_t req;
- uv_dirent_t dent;
- int legacy_engines = 0;
-
- ret = uv_fs_scandir(NULL, &req, dbfiles_path, 0, NULL);
- if (ret < 0) {
- uv_fs_req_cleanup(&req);
- netdata_log_error("uv_fs_scandir(%s): %s", dbfiles_path, uv_strerror(ret));
- return ret;
- }
-
- while(UV_EOF != uv_fs_scandir_next(&req, &dent)) {
- if (dent.type == UV_DIRENT_DIR) {
- if (is_legacy_child(dent.name))
- legacy_engines++;
- }
- }
- uv_fs_req_cleanup(&req);
- return legacy_engines;
-}
-
-int compute_multidb_diskspace()
-{
- char multidb_disk_space_file[FILENAME_MAX + 1];
- FILE *fp;
- int computed_multidb_disk_quota_mb = -1;
-
- snprintfz(multidb_disk_space_file, FILENAME_MAX, "%s/dbengine_multihost_size", netdata_configured_varlib_dir);
- fp = fopen(multidb_disk_space_file, "r");
- if (likely(fp)) {
- int rc = fscanf(fp, "%d", &computed_multidb_disk_quota_mb);
- fclose(fp);
- if (unlikely(rc != 1 || computed_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB)) {
- errno = 0;
- netdata_log_error("File '%s' contains invalid input, it will be rebuild", multidb_disk_space_file);
- computed_multidb_disk_quota_mb = -1;
- }
- }
-
- if (computed_multidb_disk_quota_mb == -1) {
- int rc = count_legacy_children(netdata_configured_cache_dir);
- if (likely(rc >= 0)) {
- computed_multidb_disk_quota_mb = (rc + 1) * default_rrdeng_disk_quota_mb;
- netdata_log_info("Found %d legacy dbengines, setting multidb diskspace to %dMB", rc, computed_multidb_disk_quota_mb);
-
- fp = fopen(multidb_disk_space_file, "w");
- if (likely(fp)) {
- fprintf(fp, "%d", computed_multidb_disk_quota_mb);
- netdata_log_info("Created file '%s' to store the computed value", multidb_disk_space_file);
- fclose(fp);
- } else
- netdata_log_error("Failed to store the default multidb disk quota size on '%s'", multidb_disk_space_file);
- }
- else
- computed_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb;
- }
-
- return computed_multidb_disk_quota_mb;
-}
diff --git a/database/engine/datafile.h b/src/database/engine/datafile.h
index 569f1b0a2..569f1b0a2 100644
--- a/database/engine/datafile.h
+++ b/src/database/engine/datafile.h
diff --git a/database/engine/dbengine-diagram.xml b/src/database/engine/dbengine-diagram.xml
index 793e8a355..793e8a355 100644
--- a/database/engine/dbengine-diagram.xml
+++ b/src/database/engine/dbengine-diagram.xml
diff --git a/database/engine/journalfile.c b/src/database/engine/journalfile.c
index 9005b81ca..4ea988d64 100644
--- a/database/engine/journalfile.c
+++ b/src/database/engine/journalfile.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-3.0-or-later
+#include "libnetdata/bitmap64.h"
#include "rrdengine.h"
static void after_extent_write_journalfile_v1_io(uv_fs_t* req)
@@ -572,7 +573,7 @@ int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengi
uv_fs_t req;
uv_file file;
int ret, fd;
- struct rrdeng_jf_sb *superblock;
+ struct rrdeng_jf_sb *superblock = NULL;
uv_buf_t iov;
char path[RRDENG_PATH_MAX];
@@ -618,7 +619,7 @@ int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengi
static int journalfile_check_superblock(uv_file file)
{
int ret;
- struct rrdeng_jf_sb *superblock;
+ struct rrdeng_jf_sb *superblock = NULL;
uv_buf_t iov;
uv_fs_t req;
@@ -637,9 +638,12 @@ static int journalfile_check_superblock(uv_file file)
fatal_assert(req.result >= 0);
uv_fs_req_cleanup(&req);
- if (strncmp(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ) ||
- strncmp(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ)) {
- netdata_log_error("DBENGINE: File has invalid superblock.");
+
+ char jf_magic[RRDENG_MAGIC_SZ] = RRDENG_JF_MAGIC;
+ char jf_ver[RRDENG_VER_SZ] = RRDENG_JF_VER;
+ if (strncmp(superblock->magic_number, jf_magic, RRDENG_MAGIC_SZ) != 0 ||
+ strncmp(superblock->version, jf_ver, RRDENG_VER_SZ) != 0) {
+ nd_log(NDLS_DAEMON, NDLP_ERR, "DBENGINE: File has invalid superblock.");
ret = UV_EINVAL;
} else {
ret = 0;
@@ -651,7 +655,7 @@ static int journalfile_check_superblock(uv_file file)
static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, void *buf, unsigned max_size)
{
- static BITMAP256 page_error_map = BITMAP256_INITIALIZER;
+ static bitmap64_t page_error_map = BITMAP64_INITIALIZER;
unsigned i, count, payload_length, descr_size;
struct rrdeng_jf_store_data *jf_metric_data;
@@ -665,19 +669,20 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
}
time_t now_s = max_acceptable_collected_time();
+ time_t extent_first_time_s = journalfile->v2.first_time_s ? journalfile->v2.first_time_s : LONG_MAX;
for (i = 0; i < count ; ++i) {
- uuid_t *temp_id;
+ nd_uuid_t *temp_id;
uint8_t page_type = jf_metric_data->descr[i].type;
- if (page_type > PAGE_TYPE_MAX) {
- if (!bitmap256_get_bit(&page_error_map, page_type)) {
+ if (page_type > RRDENG_PAGE_TYPE_MAX) {
+ if (!bitmap64_get(&page_error_map, page_type)) {
netdata_log_error("DBENGINE: unknown page type %d encountered.", page_type);
- bitmap256_set_bit(&page_error_map, page_type, 1);
+ bitmap64_set(&page_error_map, page_type);
}
continue;
}
- temp_id = (uuid_t *)jf_metric_data->descr[i].uuid;
+ temp_id = (nd_uuid_t *)jf_metric_data->descr[i].uuid;
METRIC *metric = mrg_metric_get_and_acquire(main_mrg, temp_id, (Word_t) ctx);
struct rrdeng_extent_page_descr *descr = &jf_metric_data->descr[i];
@@ -700,13 +705,19 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
.section = (Word_t)ctx,
.first_time_s = vd.start_time_s,
.last_time_s = vd.end_time_s,
- .latest_update_every_s = (uint32_t) vd.update_every_s,
+ .latest_update_every_s = vd.update_every_s,
};
bool added;
metric = mrg_metric_add_and_acquire(main_mrg, entry, &added);
- if(added)
+ if(added) {
+ __atomic_add_fetch(&ctx->atomic.metrics, 1, __ATOMIC_RELAXED);
update_metric_time = false;
+ }
+ if (vd.update_every_s) {
+ uint64_t samples = (vd.end_time_s - vd.start_time_s) / vd.update_every_s;
+ __atomic_add_fetch(&ctx->atomic.samples, samples, __ATOMIC_RELAXED);
+ }
}
Word_t metric_id = mrg_metric_id(main_mrg, metric);
@@ -718,8 +729,18 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
journalfile->datafile,
jf_metric_data->extent_offset, jf_metric_data->extent_size, jf_metric_data->descr[i].page_length);
+ extent_first_time_s = MIN(extent_first_time_s, vd.start_time_s);
+
mrg_metric_release(main_mrg, metric);
}
+
+ journalfile->v2.first_time_s = extent_first_time_s;
+
+ time_t old = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED);;
+ do {
+ if(old <= extent_first_time_s)
+ break;
+ } while(!__atomic_compare_exchange_n(&ctx->atomic.first_time_s, &old, extent_first_time_s, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
}
/*
@@ -790,7 +811,7 @@ static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx,
int ret;
uint64_t pos, pos_i, max_id, id;
unsigned size_bytes;
- void *buf;
+ void *buf = NULL;
uv_buf_t iov;
uv_fs_t req;
@@ -1005,7 +1026,7 @@ void journalfile_v2_populate_retention_to_mrg(struct rrdengine_instance *ctx, st
time_t end_time_s = header_start_time_s + metric->delta_end_s;
mrg_update_metric_retention_and_granularity_by_uuid(
- main_mrg, (Word_t)ctx, &metric->uuid, start_time_s, end_time_s, (time_t) metric->update_every_s, now_s);
+ main_mrg, (Word_t)ctx, &metric->uuid, start_time_s, end_time_s, metric->update_every_s, now_s);
metric++;
}
@@ -1042,7 +1063,7 @@ int journalfile_v2_load(struct rrdengine_instance *ctx, struct rrdengine_journal
journal_v1_file_size = (uint32_t)statbuf.st_size;
journalfile_v2_generate_path(datafile, path_v2, sizeof(path_v2));
- fd = open(path_v2, O_RDONLY);
+ fd = open(path_v2, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
if (errno == ENOENT)
return 1;
@@ -1135,7 +1156,7 @@ static int journalfile_metric_compare (const void *item1, const void *item2)
const struct jv2_metrics_info *metric1 = ((struct journal_metric_list_to_sort *) item1)->metric_info;
const struct jv2_metrics_info *metric2 = ((struct journal_metric_list_to_sort *) item2)->metric_info;
- return memcmp(metric1->uuid, metric2->uuid, sizeof(uuid_t));
+ return memcmp(metric1->uuid, metric2->uuid, sizeof(nd_uuid_t));
}
@@ -1226,7 +1247,7 @@ void *journalfile_v2_write_data_page(struct journal_v2_header *j2_header, void *
data_page->delta_end_s = (uint32_t) (page_info->end_time_s - (time_t) (j2_header->start_time_ut) / USEC_PER_SEC);
data_page->extent_index = page_info->extent_index;
- data_page->update_every_s = (uint32_t) page_info->update_every_s;
+ data_page->update_every_s = page_info->update_every_s;
data_page->page_length = (uint16_t) (ei ? ei->page_length : page_info->page_length);
data_page->type = 0;
@@ -1252,7 +1273,7 @@ static void *journalfile_v2_write_descriptors(struct journal_v2_header *j2_heade
page_info = *PValue;
// Write one descriptor and return the next data page location
data_page = journalfile_v2_write_data_page(j2_header, (void *) data_page, page_info);
- update_every_s = (uint32_t) page_info->update_every_s;
+ update_every_s = page_info->update_every_s;
if (NULL == data_page)
break;
}
diff --git a/database/engine/journalfile.h b/src/database/engine/journalfile.h
index 5cdf72b9d..5bb38b00a 100644
--- a/database/engine/journalfile.h
+++ b/src/database/engine/journalfile.h
@@ -7,7 +7,6 @@
/* Forward declarations */
struct rrdengine_instance;
-struct rrdengine_worker_config;
struct rrdengine_datafile;
struct rrdengine_journalfile;
@@ -85,7 +84,7 @@ struct journal_page_header {
};
uint32_t uuid_offset; // Points back to the UUID list which should point here (UUIDs should much)
uint32_t entries; // Entries
- uuid_t uuid; // Which UUID this is
+ nd_uuid_t uuid; // Which UUID this is
};
// 20 bytes
@@ -101,7 +100,7 @@ struct journal_page_list {
// UUID_LIST
// 36 bytes
struct journal_metric_list {
- uuid_t uuid;
+ nd_uuid_t uuid;
uint32_t entries; // Number of entries
uint32_t page_offset; // OFFSET that contains entries * struct( journal_page_list )
uint32_t delta_start_s; // Min time of metric
diff --git a/database/engine/metric.h b/src/database/engine/metric.h
index dbb949301..038a90e02 100644
--- a/database/engine/metric.h
+++ b/src/database/engine/metric.h
@@ -10,7 +10,7 @@ typedef struct metric METRIC;
typedef struct mrg MRG;
typedef struct mrg_entry {
- uuid_t *uuid;
+ nd_uuid_t *uuid;
Word_t section;
time_t first_time_s;
time_t last_time_s;
@@ -52,14 +52,14 @@ MRG *mrg_create(ssize_t partitions);
void mrg_destroy(MRG *mrg);
METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric);
-bool mrg_metric_release(MRG *mrg, METRIC *metric);
+void mrg_metric_release(MRG *mrg, METRIC *metric);
METRIC *mrg_metric_add_and_acquire(MRG *mrg, MRG_ENTRY entry, bool *ret);
-METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section);
+METRIC *mrg_metric_get_and_acquire(MRG *mrg, nd_uuid_t *uuid, Word_t section);
bool mrg_metric_release_and_delete(MRG *mrg, METRIC *metric);
Word_t mrg_metric_id(MRG *mrg, METRIC *metric);
-uuid_t *mrg_metric_uuid(MRG *mrg, METRIC *metric);
+nd_uuid_t *mrg_metric_uuid(MRG *mrg, METRIC *metric);
Word_t mrg_metric_section(MRG *mrg, METRIC *metric);
bool mrg_metric_set_first_time_s(MRG *mrg, METRIC *metric, time_t first_time_s);
@@ -69,13 +69,14 @@ time_t mrg_metric_get_first_time_s(MRG *mrg, METRIC *metric);
bool mrg_metric_set_clean_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s);
bool mrg_metric_set_hot_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s);
time_t mrg_metric_get_latest_time_s(MRG *mrg, METRIC *metric);
+time_t mrg_metric_get_latest_clean_time_s(MRG *mrg, METRIC *metric);
-bool mrg_metric_set_update_every(MRG *mrg, METRIC *metric, time_t update_every_s);
-bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, time_t update_every_s);
-time_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric);
+bool mrg_metric_set_update_every(MRG *mrg, METRIC *metric, uint32_t update_every_s);
+bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, uint32_t update_every_s);
+uint32_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric);
-void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s);
-void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s);
+void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, uint32_t update_every_s);
+void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, uint32_t *update_every_s);
bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metric);
bool mrg_metric_set_writer(MRG *mrg, METRIC *metric);
@@ -87,8 +88,8 @@ size_t mrg_aral_overhead(void);
void mrg_update_metric_retention_and_granularity_by_uuid(
- MRG *mrg, Word_t section, uuid_t *uuid,
+ MRG *mrg, Word_t section, nd_uuid_t *uuid,
time_t first_time_s, time_t last_time_s,
- time_t update_every_s, time_t now_s);
+ uint32_t update_every_s, time_t now_s);
#endif // DBENGINE_METRIC_H
diff --git a/database/engine/page.c b/src/database/engine/page.c
index b7a393483..5c4ac14e7 100644
--- a/database/engine/page.c
+++ b/src/database/engine/page.c
@@ -111,9 +111,9 @@ void pgd_init_arals(void)
// FIXME: add stats
pgd_alloc_globals.aral_gorilla_buffer[i] = aral_create(
buf,
- GORILLA_BUFFER_SIZE,
+ RRDENG_GORILLA_32BIT_BUFFER_SIZE,
64,
- 512 * GORILLA_BUFFER_SIZE,
+ 512 * RRDENG_GORILLA_32BIT_BUFFER_SIZE,
pgc_aral_statistics(),
NULL, NULL, false, false);
}
@@ -165,8 +165,8 @@ PGD *pgd_create(uint8_t type, uint32_t slots)
pg->states = PGD_STATE_CREATED_FROM_COLLECTOR;
switch (type) {
- case PAGE_METRICS:
- case PAGE_TIER: {
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1: {
uint32_t size = slots * page_type_size[type];
internal_fatal(!size || slots == 1,
@@ -176,28 +176,31 @@ PGD *pgd_create(uint8_t type, uint32_t slots)
pg->raw.data = pgd_data_aral_alloc(size);
break;
}
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
internal_fatal(slots == 1,
"DBENGINE: invalid number of slots (%u) or page type (%u)", slots, type);
- pg->slots = 8 * GORILLA_BUFFER_SLOTS;
+ pg->slots = 8 * RRDENG_GORILLA_32BIT_BUFFER_SLOTS;
// allocate new gorilla writer
- pg->gorilla.aral_index = gettid() % 4;
+ pg->gorilla.aral_index = gettid_cached() % 4;
pg->gorilla.writer = aral_mallocz(pgd_alloc_globals.aral_gorilla_writer[pg->gorilla.aral_index]);
// allocate new gorilla buffer
gorilla_buffer_t *gbuf = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]);
- memset(gbuf, 0, GORILLA_BUFFER_SIZE);
+ memset(gbuf, 0, RRDENG_GORILLA_32BIT_BUFFER_SIZE);
global_statistics_gorilla_buffer_add_hot();
- *pg->gorilla.writer = gorilla_writer_init(gbuf, GORILLA_BUFFER_SLOTS);
+ *pg->gorilla.writer = gorilla_writer_init(gbuf, RRDENG_GORILLA_32BIT_BUFFER_SLOTS);
pg->gorilla.num_buffers = 1;
break;
}
default:
- fatal("Unknown page type: %uc", type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, type);
+ aral_freez(pgd_alloc_globals.aral_pgd, pg);
+ pg = PGD_EMPTY;
+ break;
}
return pg;
@@ -219,8 +222,8 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size)
switch (type)
{
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
pg->raw.size = size;
pg->used = size / page_type_size[type];
pg->slots = pg->used;
@@ -228,10 +231,11 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size)
pg->raw.data = pgd_data_aral_alloc(size);
memcpy(pg->raw.data, base, size);
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
internal_fatal(size == 0, "Asked to create page with 0 data!!!");
internal_fatal(size % sizeof(uint32_t), "Unaligned gorilla buffer size");
- internal_fatal(size % GORILLA_BUFFER_SIZE, "Expected size to be a multiple of %zu-bytes", GORILLA_BUFFER_SIZE);
+ internal_fatal(size % RRDENG_GORILLA_32BIT_BUFFER_SIZE, "Expected size to be a multiple of %zu-bytes",
+ RRDENG_GORILLA_32BIT_BUFFER_SIZE);
pg->raw.data = mallocz(size);
pg->raw.size = size;
@@ -246,7 +250,10 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size)
pg->slots = pg->used;
break;
default:
- fatal("Unknown page type: %uc", type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, type);
+ aral_freez(pgd_alloc_globals.aral_pgd, pg);
+ pg = PGD_EMPTY;
+ break;
}
return pg;
@@ -262,11 +269,11 @@ void pgd_free(PGD *pg)
switch (pg->type)
{
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
pgd_data_aral_free(pg->raw.data, pg->raw.size);
break;
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
if (pg->states & PGD_STATE_CREATED_FROM_DISK)
{
internal_fatal(pg->raw.data == NULL, "Tried to free gorilla PGD loaded from disk with NULL data");
@@ -306,7 +313,8 @@ void pgd_free(PGD *pg)
break;
}
default:
- fatal("Unknown page type: %uc", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
+ break;
}
aral_freez(pgd_alloc_globals.aral_pgd, pg);
@@ -358,20 +366,21 @@ uint32_t pgd_memory_footprint(PGD *pg)
size_t footprint = 0;
switch (pg->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
footprint = sizeof(PGD) + pg->raw.size;
break;
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
if (pg->states & PGD_STATE_CREATED_FROM_DISK)
footprint = sizeof(PGD) + pg->raw.size;
else
- footprint = sizeof(PGD) + sizeof(gorilla_writer_t) + (pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE);
+ footprint = sizeof(PGD) + sizeof(gorilla_writer_t) + (pg->gorilla.num_buffers * RRDENG_GORILLA_32BIT_BUFFER_SIZE);
break;
}
default:
- fatal("Unknown page type: %uc", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
+ break;
}
return footprint;
@@ -385,15 +394,15 @@ uint32_t pgd_disk_footprint(PGD *pg)
size_t size = 0;
switch (pg->type) {
- case PAGE_METRICS:
- case PAGE_TIER: {
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1: {
uint32_t used_size = pg->used * page_type_size[pg->type];
internal_fatal(used_size > pg->raw.size, "Wrong disk footprint page size");
size = used_size;
break;
}
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR ||
pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING ||
pg->states & PGD_STATE_FLUSHED_TO_DISK)
@@ -404,7 +413,7 @@ uint32_t pgd_disk_footprint(PGD *pg)
internal_fatal(pg->gorilla.num_buffers == 0,
"Gorilla writer does not have any buffers");
- size = pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE;
+ size = pg->gorilla.num_buffers * RRDENG_GORILLA_32BIT_BUFFER_SIZE;
if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR) {
global_statistics_tier0_disk_compressed_bytes(gorilla_writer_nbytes(pg->gorilla.writer));
@@ -419,7 +428,8 @@ uint32_t pgd_disk_footprint(PGD *pg)
break;
}
default:
- fatal("Unknown page type: %uc", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
+ break;
}
internal_fatal(pg->states & PGD_STATE_CREATED_FROM_DISK,
@@ -434,11 +444,11 @@ void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size)
pgd_disk_footprint(pg), dst_size);
switch (pg->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
memcpy(dst, pg->raw.data, dst_size);
break;
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
if ((pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) == 0)
fatal("Copying to extent is supported only for PGDs that are scheduled for flushing.");
@@ -456,7 +466,8 @@ void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size)
break;
}
default:
- fatal("Unknown page type: %uc", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
+ break;
}
pg->states = PGD_STATE_FLUSHED_TO_DISK;
@@ -490,7 +501,7 @@ void pgd_append_point(PGD *pg,
fatal("Data collection on page already scheduled for flushing");
switch (pg->type) {
- case PAGE_METRICS: {
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT: {
storage_number *tier0_metric_data = (storage_number *)pg->raw.data;
storage_number t = pack_storage_number(n, flags);
tier0_metric_data[pg->used++] = t;
@@ -500,7 +511,7 @@ void pgd_append_point(PGD *pg,
break;
}
- case PAGE_TIER: {
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1: {
storage_number_tier1_t *tier12_metric_data = (storage_number_tier1_t *)pg->raw.data;
storage_number_tier1_t t;
t.sum_value = (float) n;
@@ -515,7 +526,7 @@ void pgd_append_point(PGD *pg,
break;
}
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
pg->used++;
storage_number t = pack_storage_number(n, flags);
@@ -525,9 +536,9 @@ void pgd_append_point(PGD *pg,
bool ok = gorilla_writer_write(pg->gorilla.writer, t);
if (!ok) {
gorilla_buffer_t *new_buffer = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]);
- memset(new_buffer, 0, GORILLA_BUFFER_SIZE);
+ memset(new_buffer, 0, RRDENG_GORILLA_32BIT_BUFFER_SIZE);
- gorilla_writer_add_buffer(pg->gorilla.writer, new_buffer, GORILLA_BUFFER_SLOTS);
+ gorilla_writer_add_buffer(pg->gorilla.writer, new_buffer, RRDENG_GORILLA_32BIT_BUFFER_SLOTS);
pg->gorilla.num_buffers += 1;
global_statistics_gorilla_buffer_add_hot();
@@ -537,7 +548,7 @@ void pgd_append_point(PGD *pg,
break;
}
default:
- fatal("DBENGINE: unknown page type id %d", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
break;
}
}
@@ -550,11 +561,11 @@ static void pgdc_seek(PGDC *pgdc, uint32_t position)
PGD *pg = pgdc->pgd;
switch (pg->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
pgdc->slots = pgdc->pgd->used;
break;
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
if (pg->states & PGD_STATE_CREATED_FROM_DISK) {
pgdc->slots = pgdc->pgd->slots;
pgdc->gr = gorilla_reader_init((void *) pg->raw.data);
@@ -588,7 +599,7 @@ static void pgdc_seek(PGDC *pgdc, uint32_t position)
break;
}
default:
- fatal("DBENGINE: unknown page type id %d", pg->type);
+ netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type);
break;
}
}
@@ -612,7 +623,7 @@ void pgdc_reset(PGDC *pgdc, PGD *pgd, uint32_t position)
pgdc_seek(pgdc, position);
}
-bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *sp)
+bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position __maybe_unused, STORAGE_POINT *sp)
{
if (!pgdc->pgd || pgdc->pgd == PGD_EMPTY || pgdc->position >= pgdc->slots)
{
@@ -624,7 +635,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *
switch (pgdc->pgd->type)
{
- case PAGE_METRICS: {
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT: {
storage_number *array = (storage_number *) pgdc->pgd->raw.data;
storage_number n = array[pgdc->position++];
@@ -635,7 +646,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *
return true;
}
- case PAGE_TIER: {
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1: {
storage_number_tier1_t *array = (storage_number_tier1_t *) pgdc->pgd->raw.data;
storage_number_tier1_t n = array[pgdc->position++];
@@ -648,7 +659,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *
return true;
}
- case PAGE_GORILLA_METRICS: {
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT: {
pgdc->position++;
uint32_t n = 666666666;
@@ -668,7 +679,8 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *
static bool logged = false;
if (!logged)
{
- netdata_log_error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", pgd_type(pgdc->pgd));
+ netdata_log_error("DBENGINE: unknown page type %"PRIu32" found. Cannot decode it. Ignoring its metrics.",
+ pgd_type(pgdc->pgd));
logged = true;
}
diff --git a/database/engine/page.h b/src/database/engine/page.h
index 32c87c580..32c87c580 100644
--- a/database/engine/page.h
+++ b/src/database/engine/page.h
diff --git a/database/engine/page_test.cc b/src/database/engine/page_test.cc
index d61299bc4..d61299bc4 100644
--- a/database/engine/page_test.cc
+++ b/src/database/engine/page_test.cc
diff --git a/database/engine/page_test.h b/src/database/engine/page_test.h
index 30837f0ab..30837f0ab 100644
--- a/database/engine/page_test.h
+++ b/src/database/engine/page_test.h
diff --git a/database/engine/pagecache.c b/src/database/engine/pagecache.c
index dab9cdd0d..a88992223 100644
--- a/database/engine/pagecache.c
+++ b/src/database/engine/pagecache.c
@@ -222,7 +222,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin
Word_t metric_id = mrg_metric_id(main_mrg, metric);
time_t now_s = wanted_start_time_s;
- time_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric);
+ uint32_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric);
if(!dt_s)
dt_s = default_rrd_update_every;
@@ -246,7 +246,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin
time_t page_start_time_s = pgc_page_start_time_s(page);
time_t page_end_time_s = pgc_page_end_time_s(page);
- time_t page_update_every_s = pgc_page_update_every_s(page);
+ uint32_t page_update_every_s = pgc_page_update_every_s(page);
if(!page_update_every_s)
page_update_every_s = dt_s;
@@ -282,7 +282,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin
pd->metric_id = metric_id;
pd->first_time_s = page_start_time_s;
pd->last_time_s = page_end_time_s;
- pd->update_every_s = (uint32_t) page_update_every_s;
+ pd->update_every_s = page_update_every_s;
pd->page = (open_cache_mode) ? NULL : page;
pd->status |= tags;
@@ -332,8 +332,8 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin
static void pgc_inject_gap(struct rrdengine_instance *ctx, METRIC *metric, time_t start_time_s, time_t end_time_s) {
- time_t db_first_time_s, db_last_time_s, db_update_every_s;
- mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
+ time_t db_first_time_s, db_last_time_s;
+ mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, NULL);
if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) != PAGE_IS_IN_RANGE)
return;
@@ -491,7 +491,7 @@ static size_t list_has_time_gaps(
typedef void (*page_found_callback_t)(PGC_PAGE *page, void *data);
static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METRIC *metric, usec_t start_time_ut, usec_t end_time_ut, page_found_callback_t callback, void *callback_data) {
- uuid_t *uuid = mrg_metric_uuid(main_mrg, metric);
+ nd_uuid_t *uuid = mrg_metric_uuid(main_mrg, metric);
Word_t metric_id = mrg_metric_id(main_mrg, metric);
time_t wanted_start_time_s = (time_t)(start_time_ut / USEC_PER_SEC);
@@ -547,7 +547,7 @@ static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METR
if(prc == PAGE_IS_IN_THE_FUTURE)
break;
- time_t page_update_every_s = page_entry_in_journal->update_every_s;
+ uint32_t page_update_every_s = page_entry_in_journal->update_every_s;
size_t page_length = page_entry_in_journal->page_length;
if(datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) { //for open cache item
@@ -567,7 +567,7 @@ static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METR
.metric_id = metric_id,
.start_time_s = page_first_time_s,
.end_time_s = page_last_time_s,
- .update_every_s = (uint32_t) page_update_every_s,
+ .update_every_s = page_update_every_s,
.data = datafile,
.size = 0,
.custom_data = (uint8_t *) &ei,
@@ -845,7 +845,7 @@ struct pgc_page *pg_cache_lookup_next(
struct rrdengine_instance *ctx,
PDC *pdc,
time_t now_s,
- time_t last_update_every_s,
+ uint32_t last_update_every_s,
size_t *entries
) {
if (unlikely(!pdc))
@@ -905,7 +905,7 @@ struct pgc_page *pg_cache_lookup_next(
time_t page_start_time_s = pgc_page_start_time_s(page);
time_t page_end_time_s = pgc_page_end_time_s(page);
- time_t page_update_every_s = pgc_page_update_every_s(page);
+ uint32_t page_update_every_s = pgc_page_update_every_s(page);
if(unlikely(page_start_time_s == INVALID_TIME || page_end_time_s == INVALID_TIME)) {
__atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_zero_time_skipped, 1, __ATOMIC_RELAXED);
@@ -918,7 +918,7 @@ struct pgc_page *pg_cache_lookup_next(
if (unlikely(page_update_every_s <= 0 || page_update_every_s > 86400)) {
__atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_invalid_update_every_fixed, 1, __ATOMIC_RELAXED);
page_update_every_s = pgc_page_fix_update_every(page, last_update_every_s);
- pd->update_every_s = (uint32_t) page_update_every_s;
+ pd->update_every_s = page_update_every_s;
}
size_t entries_by_size = pgd_slots_used(pgc_page_data(page));
@@ -983,7 +983,7 @@ struct pgc_page *pg_cache_lookup_next(
return page;
}
-void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, time_t update_every_s,
+void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, uint32_t update_every_s,
struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length) {
if(!datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) // for open cache item
@@ -1003,7 +1003,7 @@ void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s
.metric_id = metric_id,
.start_time_s = start_time_s,
.end_time_s = end_time_s,
- .update_every_s = (uint32_t) update_every_s,
+ .update_every_s = update_every_s,
.size = 0,
.data = datafile,
.custom_data = (uint8_t *) &ext_io_data,
diff --git a/database/engine/pdc.c b/src/database/engine/pdc.c
index 5fe205e64..28a83e2bc 100644
--- a/database/engine/pdc.c
+++ b/src/database/engine/pdc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#define NETDATA_RRD_INTERNALS
#include "pdc.h"
+#include "dbengine-compression.h"
struct extent_page_details_list {
uv_file file;
@@ -628,28 +629,29 @@ void collect_page_flags_to_buffer(BUFFER *wb, RRDENG_COLLECT_PAGE_FLAGS flags) {
buffer_strcat(wb, "STEP_UNALIGNED");
}
-inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, time_t overwrite_zero_update_every_s, bool have_read_error) {
+inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, uint32_t overwrite_zero_update_every_s, bool have_read_error) {
time_t start_time_s = (time_t) (descr->start_time_ut / USEC_PER_SEC);
- time_t end_time_s;
- size_t entries;
+ time_t end_time_s = 0;
+ size_t entries = 0;
switch (descr->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
end_time_s = descr->end_time_ut / USEC_PER_SEC;
entries = 0;
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
end_time_s = start_time_s + descr->gorilla.delta_time_s;
entries = descr->gorilla.entries;
break;
default:
- fatal("Unknown page type: %uc\n", descr->type);
+ // Nothing to do. Validate page will notify the user.
+ break;
}
return validate_page(
- (uuid_t *)descr->uuid,
+ (nd_uuid_t *)descr->uuid,
start_time_s,
end_time_s,
0,
@@ -663,32 +665,33 @@ inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_
}
VALIDATED_PAGE_DESCRIPTOR validate_page(
- uuid_t *uuid,
+ nd_uuid_t *uuid,
time_t start_time_s,
time_t end_time_s,
- time_t update_every_s, // can be zero, if unknown
+ uint32_t update_every_s, // can be zero, if unknown
size_t page_length,
uint8_t page_type,
size_t entries, // can be zero, if unknown
time_t now_s, // can be zero, to disable future timestamp check
- time_t overwrite_zero_update_every_s, // can be zero, if unknown
+ uint32_t overwrite_zero_update_every_s, // can be zero, if unknown
bool have_read_error,
const char *msg,
- RRDENG_COLLECT_PAGE_FLAGS flags) {
-
+ RRDENG_COLLECT_PAGE_FLAGS flags)
+{
VALIDATED_PAGE_DESCRIPTOR vd = {
.start_time_s = start_time_s,
.end_time_s = end_time_s,
.update_every_s = update_every_s,
.page_length = page_length,
+ .point_size = page_type_size[page_type],
.type = page_type,
.is_valid = true,
};
- vd.point_size = page_type_size[vd.type];
+ bool known_page_type = true;
switch (page_type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
// always calculate entries by size
vd.entries = page_entries_by_size(vd.page_length, vd.point_size);
@@ -696,13 +699,13 @@ VALIDATED_PAGE_DESCRIPTOR validate_page(
if(!entries)
entries = vd.entries;
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
internal_fatal(entries == 0, "0 number of entries found on gorilla page");
vd.entries = entries;
break;
default:
- // TODO: should set vd.is_valid false instead?
- fatal("Unknown page type: %uc", page_type);
+ known_page_type = false;
+ break;
}
// allow to be called without update every (when loading pages from disk)
@@ -723,16 +726,16 @@ VALIDATED_PAGE_DESCRIPTOR validate_page(
// If gorilla can not compress the data we might end up needing slightly more
// than 4KiB. However, gorilla pages extend the page length by increments of
// 512 bytes.
- max_page_length += ((page_type == PAGE_GORILLA_METRICS) * GORILLA_BUFFER_SIZE);
+ max_page_length += ((page_type == RRDENG_PAGE_TYPE_GORILLA_32BIT) * RRDENG_GORILLA_32BIT_BUFFER_SIZE);
- if( have_read_error ||
+ if (!known_page_type ||
+ have_read_error ||
vd.page_length == 0 ||
vd.page_length > max_page_length ||
vd.start_time_s > vd.end_time_s ||
(now_s && vd.end_time_s > now_s) ||
vd.start_time_s <= 0 ||
vd.end_time_s <= 0 ||
- vd.update_every_s < 0 ||
(vd.start_time_s == vd.end_time_s && vd.entries > 1) ||
(vd.update_every_s == 0 && vd.entries > 1))
{
@@ -791,13 +794,13 @@ VALIDATED_PAGE_DESCRIPTOR validate_page(
nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR,
#endif
"DBENGINE: metric '%s' %s invalid page of type %u "
- "from %ld to %ld (now %ld), update every %ld, page length %zu, entries %zu (flags: %s)",
+ "from %ld to %ld (now %ld), update every %u, page length %zu, entries %zu (flags: %s)",
uuid_str, msg, vd.type,
vd.start_time_s, vd.end_time_s, now_s, vd.update_every_s, vd.page_length, vd.entries, wb?buffer_tostring(wb):""
);
}
else {
- const char *err_valid = (vd.is_valid) ? "" : "found invalid, ";
+ const char *err_valid = "";
const char *err_start = (vd.start_time_s == start_time_s) ? "" : "start time updated, ";
const char *err_end = (vd.end_time_s == end_time_s) ? "" : "end time updated, ";
const char *err_update = (vd.update_every_s == update_every_s) ? "" : "update every updated, ";
@@ -811,9 +814,9 @@ VALIDATED_PAGE_DESCRIPTOR validate_page(
nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR,
#endif
"DBENGINE: metric '%s' %s page of type %u "
- "from %ld to %ld (now %ld), update every %ld, page length %zu, entries %zu (flags: %s), "
+ "from %ld to %ld (now %ld), update every %u, page length %zu, entries %zu (flags: %s), "
"found inconsistent - the right is "
- "from %ld to %ld, update every %ld, page length %zu, entries %zu: "
+ "from %ld to %ld, update every %u, page length %zu, entries %zu: "
"%s%s%s%s%s%s%s",
uuid_str, msg, vd.type,
start_time_s, end_time_s, now_s, update_every_s, page_length, entries, wb?buffer_tostring(wb):"",
@@ -871,11 +874,11 @@ static void epdl_extent_loading_error_log(struct rrdengine_instance *ctx, EPDL *
if (descr) {
start_time_s = (time_t)(descr->start_time_ut / USEC_PER_SEC);
switch (descr->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
end_time_s = (time_t)(descr->end_time_ut / USEC_PER_SEC);
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
end_time_s = (time_t) start_time_s + (descr->gorilla.delta_time_s);
break;
}
@@ -895,7 +898,7 @@ static void epdl_extent_loading_error_log(struct rrdengine_instance *ctx, EPDL *
start_time_s = pd->first_time_s;
end_time_s = pd->last_time_s;
METRIC *metric = (METRIC *)pd->metric_id;
- uuid_t *u = mrg_metric_uuid(main_mrg, metric);
+ nd_uuid_t *u = mrg_metric_uuid(main_mrg, metric);
uuid_unparse_lower(*u, uuid);
used_epdl = true;
}
@@ -938,7 +941,6 @@ static bool epdl_populate_pages_from_extent_data(
PDC_PAGE_STATUS tags,
bool cached_extent)
{
- int ret;
unsigned i, count;
void *uncompressed_buf = NULL;
uint32_t payload_length, payload_offset, trailer_offset, uncompressed_payload_length = 0;
@@ -973,18 +975,17 @@ static bool epdl_populate_pages_from_extent_data(
if( !can_use_data ||
count < 1 ||
count > MAX_PAGES_PER_EXTENT ||
- (header->compression_algorithm != RRD_NO_COMPRESSION && header->compression_algorithm != RRD_LZ4) ||
+ !dbengine_valid_compression_algorithm(header->compression_algorithm) ||
(payload_length != trailer_offset - payload_offset) ||
(data_length != payload_offset + payload_length + sizeof(*trailer))
- ) {
+ ) {
epdl_extent_loading_error_log(ctx, epdl, NULL, "header is INVALID");
return false;
}
crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, data, epdl->extent_size - sizeof(*trailer));
- ret = crc32cmp(trailer->checksum, crc);
- if (unlikely(ret)) {
+ if (unlikely(crc32cmp(trailer->checksum, crc))) {
ctx_io_error(ctx);
have_read_error = true;
epdl_extent_loading_error_log(ctx, epdl, NULL, "CRC32 checksum FAILED");
@@ -993,14 +994,15 @@ static bool epdl_populate_pages_from_extent_data(
if(worker)
worker_is_busy(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION);
- if (likely(!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm)) {
+ if (likely(!have_read_error && RRDENG_COMPRESSION_NONE != header->compression_algorithm)) {
// find the uncompressed extent size
uncompressed_payload_length = 0;
for (i = 0; i < count; ++i) {
size_t page_length = header->descr[i].page_length;
- if (page_length > RRDENG_BLOCK_SIZE && (header->descr[i].type != PAGE_GORILLA_METRICS ||
- (header->descr[i].type == PAGE_GORILLA_METRICS &&
- (page_length - RRDENG_BLOCK_SIZE) % GORILLA_BUFFER_SIZE))) {
+ if (page_length > RRDENG_BLOCK_SIZE &&
+ (header->descr[i].type != RRDENG_PAGE_TYPE_GORILLA_32BIT ||
+ (header->descr[i].type == RRDENG_PAGE_TYPE_GORILLA_32BIT &&
+ (page_length - RRDENG_BLOCK_SIZE) % RRDENG_GORILLA_32BIT_BUFFER_SIZE))) {
have_read_error = true;
break;
}
@@ -1015,11 +1017,16 @@ static bool epdl_populate_pages_from_extent_data(
eb = extent_buffer_get(uncompressed_payload_length);
uncompressed_buf = eb->data;
- ret = LZ4_decompress_safe(data + payload_offset, uncompressed_buf,
- (int) payload_length, (int) uncompressed_payload_length);
+ size_t bytes = dbengine_decompress(uncompressed_buf, data + payload_offset,
+ uncompressed_payload_length, payload_length,
+ header->compression_algorithm);
- __atomic_add_fetch(&ctx->stats.before_decompress_bytes, payload_length, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ctx->stats.after_decompress_bytes, ret, __ATOMIC_RELAXED);
+ if(!bytes)
+ have_read_error = true;
+ else {
+ __atomic_add_fetch(&ctx->stats.before_decompress_bytes, payload_length, __ATOMIC_RELAXED);
+ __atomic_add_fetch(&ctx->stats.after_decompress_bytes, bytes, __ATOMIC_RELAXED);
+ }
}
}
@@ -1075,7 +1082,7 @@ static bool epdl_populate_pages_from_extent_data(
stats_load_invalid_page++;
}
else {
- if (RRD_NO_COMPRESSION == header->compression_algorithm) {
+ if (RRDENG_COMPRESSION_NONE == header->compression_algorithm) {
pgd = pgd_create_from_disk_data(header->descr[i].type,
data + payload_offset + page_offset,
vd.page_length);
@@ -1172,7 +1179,7 @@ static bool epdl_populate_pages_from_extent_data(
static inline void *datafile_extent_read(struct rrdengine_instance *ctx, uv_file file, unsigned pos, unsigned size_bytes)
{
- void *buffer;
+ void *buffer = NULL;
uv_fs_t request;
unsigned real_io_size = ALIGN_BYTES_CEILING(size_bytes);
diff --git a/database/engine/pdc.h b/src/database/engine/pdc.h
index 9bae39ade..9bae39ade 100644
--- a/database/engine/pdc.h
+++ b/src/database/engine/pdc.h
diff --git a/database/engine/rrddiskprotocol.h b/src/database/engine/rrddiskprotocol.h
index 86b41f0b3..dc1a4c980 100644
--- a/database/engine/rrddiskprotocol.h
+++ b/src/database/engine/rrddiskprotocol.h
@@ -19,13 +19,16 @@
#define UUID_SZ (16)
#define CHECKSUM_SZ (4) /* CRC32 */
-#define RRD_NO_COMPRESSION (0)
-#define RRD_LZ4 (1)
+#define RRDENG_COMPRESSION_NONE (0)
+#define RRDENG_COMPRESSION_LZ4 (1)
+#define RRDENG_COMPRESSION_ZSTD (2)
#define RRDENG_DF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ + sizeof(uint8_t)))
+
/*
* Data file persistent super-block
*/
+
struct rrdeng_df_sb {
char magic_number[RRDENG_MAGIC_SZ];
char version[RRDENG_VER_SZ];
@@ -36,10 +39,11 @@ struct rrdeng_df_sb {
/*
* Page types
*/
-#define PAGE_METRICS (0)
-#define PAGE_TIER (1)
-#define PAGE_GORILLA_METRICS (2)
-#define PAGE_TYPE_MAX 2 // Maximum page type (inclusive)
+
+#define RRDENG_PAGE_TYPE_ARRAY_32BIT (0)
+#define RRDENG_PAGE_TYPE_ARRAY_TIER1 (1)
+#define RRDENG_PAGE_TYPE_GORILLA_32BIT (2)
+#define RRDENG_PAGE_TYPE_MAX (2) // Maximum page type (inclusive)
/*
* Data file page descriptor
diff --git a/database/engine/rrdengine.c b/src/database/engine/rrdengine.c
index b82cc1ad1..2d6583ead 100644
--- a/database/engine/rrdengine.c
+++ b/src/database/engine/rrdengine.c
@@ -3,6 +3,7 @@
#include "rrdengine.h"
#include "pdc.h"
+#include "dbengine-compression.h"
rrdeng_stats_t global_io_errors = 0;
rrdeng_stats_t global_fs_errors = 0;
@@ -10,7 +11,7 @@ rrdeng_stats_t rrdeng_reserved_file_descriptors = 0;
rrdeng_stats_t global_pg_cache_over_half_dirty_events = 0;
rrdeng_stats_t global_flushing_pressure_page_deletions = 0;
-unsigned rrdeng_pages_per_extent = MAX_PAGES_PER_EXTENT;
+unsigned rrdeng_pages_per_extent = DEFAULT_PAGES_PER_EXTENT;
#if WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_OPCODE_MAX + 2)
#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least (RRDENG_MAX_OPCODE + 2)
@@ -39,6 +40,7 @@ struct rrdeng_main {
uv_loop_t loop;
uv_async_t async;
uv_timer_t timer;
+ uv_timer_t retention_timer;
pid_t tid;
bool shutdown;
@@ -110,16 +112,10 @@ static void sanity_check(void)
/* Data file super-block cannot be larger than RRDENG_BLOCK_SIZE */
BUILD_BUG_ON(RRDENG_DF_SB_PADDING_SZ < 0);
- BUILD_BUG_ON(sizeof(uuid_t) != UUID_SZ); /* check UUID size */
+ BUILD_BUG_ON(sizeof(nd_uuid_t) != UUID_SZ); /* check UUID size */
/* page count must fit in 8 bits */
BUILD_BUG_ON(MAX_PAGES_PER_EXTENT > 255);
-
- /* extent cache count must fit in 32 bits */
-// BUILD_BUG_ON(MAX_CACHED_EXTENTS > 32);
-
- /* page info scratch space must be able to hold 2 32-bit integers */
- BUILD_BUG_ON(sizeof(((struct rrdeng_page_info *)0)->scratch) < 2 * sizeof(uint32_t));
}
// ----------------------------------------------------------------------------
@@ -229,10 +225,10 @@ static void after_work_standard_callback(uv_work_t* req, int status) {
worker_is_idle();
}
-static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb work_cb, after_work_cb after_work_cb) {
+static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb do_work_cb, after_work_cb do_after_work_cb) {
struct rrdeng_work *work_request = NULL;
- internal_fatal(rrdeng_main.tid != gettid(), "work_dispatch() can only be run from the event loop thread");
+ internal_fatal(rrdeng_main.tid != gettid_cached(), "work_dispatch() can only be run from the event loop thread");
work_request = aral_mallocz(rrdeng_main.work_cmd.ar);
memset(work_request, 0, sizeof(struct rrdeng_work));
@@ -240,8 +236,8 @@ static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct com
work_request->ctx = ctx;
work_request->data = data;
work_request->completion = completion;
- work_request->work_cb = work_cb;
- work_request->after_work_cb = after_work_cb;
+ work_request->work_cb = do_work_cb;
+ work_request->after_work_cb = do_after_work_cb;
work_request->opcode = opcode;
if(uv_queue_work(&rrdeng_main.loop, &work_request->req, work_standard_worker, after_work_standard_callback)) {
@@ -772,13 +768,10 @@ static struct rrdengine_datafile *get_datafile_to_write_extent(struct rrdengine_
*/
static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_instance *ctx, struct page_descr_with_data *base, struct completion *completion) {
int ret;
- int compressed_size, max_compressed_size = 0;
unsigned i, count, size_bytes, pos, real_io_size;
- uint32_t uncompressed_payload_length, payload_offset;
+ uint32_t uncompressed_payload_length, max_compressed_size, payload_offset;
struct page_descr_with_data *descr, *eligible_pages[MAX_PAGES_PER_EXTENT];
struct extent_io_descriptor *xt_io_descr;
- struct extent_buffer *eb = NULL;
- void *compressed_buf = NULL;
Word_t Index;
uint8_t compression_algorithm = ctx->config.global_compress_alg;
struct rrdengine_datafile *datafile;
@@ -807,20 +800,8 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta
xt_io_descr = extent_io_descriptor_get();
xt_io_descr->ctx = ctx;
payload_offset = sizeof(*header) + count * sizeof(header->descr[0]);
- switch (compression_algorithm) {
- case RRD_NO_COMPRESSION:
- size_bytes = payload_offset + uncompressed_payload_length + sizeof(*trailer);
- break;
-
- default: /* Compress */
- fatal_assert(uncompressed_payload_length < LZ4_MAX_INPUT_SIZE);
- max_compressed_size = LZ4_compressBound(uncompressed_payload_length);
- eb = extent_buffer_get(max_compressed_size);
- compressed_buf = eb->data;
- size_bytes = payload_offset + MAX(uncompressed_payload_length, (unsigned)max_compressed_size) + sizeof(*trailer);
- break;
- }
-
+ max_compressed_size = dbengine_max_compressed_size(uncompressed_payload_length, compression_algorithm);
+ size_bytes = payload_offset + MAX(uncompressed_payload_length, max_compressed_size) + sizeof(*trailer);
ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes));
if (unlikely(ret)) {
fatal("DBENGINE: posix_memalign:%s", strerror(ret));
@@ -832,23 +813,22 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta
pos = 0;
header = xt_io_descr->buf;
- header->compression_algorithm = compression_algorithm;
header->number_of_pages = count;
pos += sizeof(*header);
for (i = 0 ; i < count ; ++i) {
descr = xt_io_descr->descr_array[i];
header->descr[i].type = descr->type;
- uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id);
+ uuid_copy(*(nd_uuid_t *)header->descr[i].uuid, *descr->id);
header->descr[i].page_length = descr->page_length;
header->descr[i].start_time_ut = descr->start_time_ut;
switch (descr->type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
header->descr[i].end_time_ut = descr->end_time_ut;
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
header->descr[i].gorilla.delta_time_s = (uint32_t) ((descr->end_time_ut - descr->start_time_ut) / USEC_PER_SEC);
header->descr[i].gorilla.entries = pgd_slots_used(descr->pgd);
break;
@@ -858,29 +838,40 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta
pos += sizeof(header->descr[i]);
}
+
+ // build the extent payload
for (i = 0 ; i < count ; ++i) {
descr = xt_io_descr->descr_array[i];
pgd_copy_to_extent(descr->pgd, xt_io_descr->buf + pos, descr->page_length);
pos += descr->page_length;
}
- if(likely(compression_algorithm == RRD_LZ4)) {
- compressed_size = LZ4_compress_default(
- xt_io_descr->buf + payload_offset,
- compressed_buf,
- (int)uncompressed_payload_length,
- max_compressed_size);
+ // compress the payload
+ size_t compressed_size =
+ (int)dbengine_compress(xt_io_descr->buf + payload_offset,
+ uncompressed_payload_length,
+ compression_algorithm);
- __atomic_add_fetch(&ctx->stats.before_compress_bytes, uncompressed_payload_length, __ATOMIC_RELAXED);
- __atomic_add_fetch(&ctx->stats.after_compress_bytes, compressed_size, __ATOMIC_RELAXED);
+ internal_fatal(compressed_size > max_compressed_size, "DBENGINE: compression returned more data than the max allowed");
+ internal_fatal(compressed_size > uncompressed_payload_length, "DBENGINE: compression returned more data than the uncompressed extent");
- (void) memcpy(xt_io_descr->buf + payload_offset, compressed_buf, compressed_size);
- extent_buffer_release(eb);
- size_bytes = payload_offset + compressed_size + sizeof(*trailer);
+ if(compressed_size) {
+ header->compression_algorithm = compression_algorithm;
header->payload_length = compressed_size;
}
- else { // RRD_NO_COMPRESSION
- header->payload_length = uncompressed_payload_length;
+ else {
+ // compression failed, or generated bigger pages
+ // so it didn't touch our uncompressed buffer
+ header->compression_algorithm = RRDENG_COMPRESSION_NONE;
+ header->payload_length = compressed_size = uncompressed_payload_length;
+ }
+
+ // set the correct size
+ size_bytes = payload_offset + compressed_size + sizeof(*trailer);
+
+ if(compression_algorithm != RRDENG_COMPRESSION_NONE) {
+ __atomic_add_fetch(&ctx->stats.before_compress_bytes, uncompressed_payload_length, __ATOMIC_RELAXED);
+ __atomic_add_fetch(&ctx->stats.after_compress_bytes, compressed_size, __ATOMIC_RELAXED);
}
real_io_size = ALIGN_BYTES_CEILING(size_bytes);
@@ -939,7 +930,7 @@ static void after_database_rotate(struct rrdengine_instance *ctx __maybe_unused,
}
struct uuid_first_time_s {
- uuid_t *uuid;
+ nd_uuid_t *uuid;
time_t first_time_s;
METRIC *metric;
size_t pages_found;
@@ -1171,7 +1162,17 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r
for (size_t index = 0; index < added; ++index) {
uuid_first_t_entry = &uuid_first_entry_list[index];
if (likely(uuid_first_t_entry->first_time_s != LONG_MAX)) {
- mrg_metric_set_first_time_s_if_bigger(main_mrg, uuid_first_t_entry->metric, uuid_first_t_entry->first_time_s);
+
+ time_t old_first_time_s = mrg_metric_get_first_time_s(main_mrg, uuid_first_t_entry->metric);
+
+ bool changed = mrg_metric_set_first_time_s_if_bigger(main_mrg, uuid_first_t_entry->metric, uuid_first_t_entry->first_time_s);
+ if (changed) {
+ uint32_t update_every_s = mrg_metric_get_update_every_s(main_mrg, uuid_first_t_entry->metric);
+ if (update_every_s && old_first_time_s && uuid_first_t_entry->first_time_s > old_first_time_s) {
+ uint64_t remove_samples = (uuid_first_t_entry->first_time_s - old_first_time_s) / update_every_s;
+ __atomic_sub_fetch(&ctx->atomic.samples, remove_samples, __ATOMIC_RELAXED);
+ }
+ }
mrg_metric_release(main_mrg, uuid_first_t_entry->metric);
}
else {
@@ -1180,6 +1181,14 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r
// there is no retention for this metric
bool has_retention = mrg_metric_zero_disk_retention(main_mrg, uuid_first_t_entry->metric);
if (!has_retention) {
+ time_t first_time_s = mrg_metric_get_first_time_s(main_mrg, uuid_first_t_entry->metric);
+ time_t last_time_s = mrg_metric_get_latest_time_s(main_mrg, uuid_first_t_entry->metric);
+ time_t update_every_s = mrg_metric_get_update_every_s(main_mrg, uuid_first_t_entry->metric);
+ if (update_every_s && first_time_s && last_time_s) {
+ uint64_t remove_samples = (first_time_s - last_time_s) / update_every_s;
+ __atomic_sub_fetch(&ctx->atomic.samples, remove_samples, __ATOMIC_RELAXED);
+ }
+
bool deleted = mrg_metric_release_and_delete(main_mrg, uuid_first_t_entry->metric);
if(deleted)
deleted_metrics++;
@@ -1280,7 +1289,7 @@ void datafile_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *
static void *database_rotate_tp_worker(struct rrdengine_instance *ctx __maybe_unused, void *data __maybe_unused, struct completion *completion __maybe_unused, uv_work_t *uv_work_req __maybe_unused) {
datafile_delete(ctx, ctx->datafiles.first, ctx_is_available_for_queries(ctx), true);
- if (rrdeng_ctx_exceeded_disk_quota(ctx))
+ if (rrdeng_ctx_tier_cap_exceeded(ctx))
rrdeng_enq_cmd(ctx, RRDENG_OPCODE_DATABASE_ROTATE, NULL, NULL, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
rrdcontext_db_rotation();
@@ -1352,8 +1361,7 @@ static void *ctx_shutdown_tp_worker(struct rrdengine_instance *ctx __maybe_unuse
if(!logged) {
logged = true;
netdata_log_info("DBENGINE: waiting for %zu inflight queries to finish to shutdown tier %d...",
- __atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED),
- (ctx->config.legacy) ? -1 : ctx->config.tier);
+ __atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED), ctx->config.tier);
}
sleep_usec(1 * USEC_PER_MS);
}
@@ -1390,26 +1398,27 @@ static void *query_prep_tp_worker(struct rrdengine_instance *ctx __maybe_unused,
}
uint64_t rrdeng_target_data_file_size(struct rrdengine_instance *ctx) {
- uint64_t target_size = ctx->config.max_disk_space / TARGET_DATAFILES;
+ uint64_t target_size = ctx->config.max_disk_space ? ctx->config.max_disk_space / TARGET_DATAFILES : MAX_DATAFILE_SIZE;
target_size = MIN(target_size, MAX_DATAFILE_SIZE);
target_size = MAX(target_size, MIN_DATAFILE_SIZE);
return target_size;
}
-bool rrdeng_ctx_exceeded_disk_quota(struct rrdengine_instance *ctx)
+time_t get_datafile_end_time(struct rrdengine_instance *ctx)
{
- if(!ctx->datafiles.first)
- // no datafiles available
- return false;
+ time_t last_time_s = 0;
- if(!ctx->datafiles.first->next)
- // only 1 datafile available
- return false;
+ uv_rwlock_rdlock(&ctx->datafiles.rwlock);
+ struct rrdengine_datafile *datafile = ctx->datafiles.first;
- uint64_t estimated_disk_space = ctx_current_disk_space_get(ctx) + rrdeng_target_data_file_size(ctx) -
- (ctx->datafiles.first->prev ? ctx->datafiles.first->prev->pos : 0);
+ if (datafile) {
+ last_time_s = datafile->journalfile->v2.last_time_s;
+ if (!last_time_s)
+ last_time_s = datafile->journalfile->v2.first_time_s;
+ }
- return estimated_disk_space > ctx->config.max_disk_space;
+ uv_rwlock_rdunlock(&ctx->datafiles.rwlock);
+ return last_time_s;
}
/* return 0 on success */
@@ -1580,6 +1589,80 @@ static void *cleanup_tp_worker(struct rrdengine_instance *ctx __maybe_unused, vo
return data;
}
+uint64_t get_used_disk_space(struct rrdengine_instance *ctx)
+{
+ uint64_t active_space = 0;
+
+ if (ctx->datafiles.first && ctx->datafiles.first->prev)
+ active_space = ctx->datafiles.first->prev->pos;
+
+ uint64_t estimated_disk_space = ctx_current_disk_space_get(ctx) + rrdeng_target_data_file_size(ctx) - active_space;
+
+ uint64_t database_space = get_total_database_space();
+ uint64_t adjusted_database_space = database_space * ctx->config.disk_percentage / 100 ;
+ estimated_disk_space += adjusted_database_space;
+
+ return estimated_disk_space;
+}
+
+static time_t get_tier_retention(struct rrdengine_instance *ctx)
+{
+ time_t retention = 0;
+ if (localhost) {
+ STORAGE_ENGINE *eng = localhost->db[ctx->config.tier].eng;
+ if (eng) {
+ time_t first_time_s = get_datafile_end_time(ctx);
+ if (first_time_s)
+ retention = now_realtime_sec() - first_time_s;
+ }
+ }
+ return retention;
+}
+
+// Check if disk or retention time cap reached
+bool rrdeng_ctx_tier_cap_exceeded(struct rrdengine_instance *ctx)
+{
+ if(!ctx->datafiles.first)
+ // no datafiles available
+ return false;
+
+ if(!ctx->datafiles.first->next)
+ // only 1 datafile available
+ return false;
+
+ uint64_t estimated_disk_space = get_used_disk_space(ctx);
+ time_t retention = get_tier_retention(ctx);
+
+ if (ctx->config.max_retention_s && retention > ctx->config.max_retention_s)
+ return true;
+
+ if (ctx->config.max_disk_space && estimated_disk_space > ctx->config.max_disk_space)
+ return true;
+
+ return false;
+}
+
+void retention_timer_cb(uv_timer_t *handle)
+{
+ if (!localhost)
+ return;
+
+ worker_is_busy(RRDENG_TIMER_CB);
+ uv_stop(handle->loop);
+ uv_update_time(handle->loop);
+
+ for (size_t tier = 0; tier < storage_tiers; tier++) {
+ STORAGE_ENGINE *eng = localhost->db[tier].eng;
+ if (!eng || eng->seb != STORAGE_ENGINE_BACKEND_DBENGINE)
+ continue;
+ bool cleanup = rrdeng_ctx_tier_cap_exceeded(multidb_ctx[tier]);
+ if (cleanup)
+ rrdeng_enq_cmd(multidb_ctx[tier], RRDENG_OPCODE_DATABASE_ROTATE, NULL, NULL, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL);
+ }
+
+ worker_is_idle();
+}
+
void timer_cb(uv_timer_t* handle) {
worker_is_busy(RRDENG_TIMER_CB);
uv_stop(handle->loop);
@@ -1643,7 +1726,17 @@ bool rrdeng_dbengine_spawn(struct rrdengine_instance *ctx __maybe_unused) {
fatal_assert(0 == uv_loop_close(&rrdeng_main.loop));
return false;
}
+
+ ret = uv_timer_init(&rrdeng_main.loop, &rrdeng_main.retention_timer);
+ if (ret) {
+ netdata_log_error("DBENGINE: uv_timer_init(): %s", uv_strerror(ret));
+ uv_close((uv_handle_t *)&rrdeng_main.async, NULL);
+ fatal_assert(0 == uv_loop_close(&rrdeng_main.loop));
+ return false;
+ }
+
rrdeng_main.timer.data = &rrdeng_main;
+ rrdeng_main.retention_timer.data = &rrdeng_main;
dbengine_initialize_structures();
@@ -1675,9 +1768,125 @@ static inline void worker_dispatch_query_prep(struct rrdeng_cmd cmd, bool from_w
work_dispatch(ctx, pdc, NULL, cmd.opcode, query_prep_tp_worker, NULL);
}
+uint64_t get_directory_free_bytes_space(struct rrdengine_instance *ctx)
+{
+ uint64_t free_bytes = 0;
+ struct statvfs buff_statvfs;
+ if (statvfs(ctx->config.dbfiles_path, &buff_statvfs) == 0)
+ free_bytes = buff_statvfs.f_bavail * buff_statvfs.f_bsize;
+
+ return (free_bytes - (free_bytes * 5 / 100));
+}
+
+void calculate_tier_disk_space_percentage(void)
+{
+ uint64_t tier_space[RRD_STORAGE_TIERS];
+
+ if (!localhost)
+ return;
+
+ uint64_t total_diskspace = 0;
+ for(size_t tier = 0; tier < storage_tiers ;tier++) {
+ STORAGE_ENGINE *eng = localhost->db[tier].eng;
+ if (!eng || eng->seb != STORAGE_ENGINE_BACKEND_DBENGINE) {
+ tier_space[tier] = 0;
+ continue;
+ }
+ uint64_t tier_disk_space = multidb_ctx[tier]->config.max_disk_space ?
+ multidb_ctx[tier]->config.max_disk_space :
+ get_directory_free_bytes_space(multidb_ctx[tier]);
+ total_diskspace += tier_disk_space;
+ tier_space[tier] = tier_disk_space;
+ }
+
+ if (total_diskspace) {
+ for (size_t tier = 0; tier < storage_tiers; tier++) {
+ multidb_ctx[tier]->config.disk_percentage = (100 * tier_space[tier] / total_diskspace);
+ }
+ }
+}
+
+void dbengine_retention_statistics(void)
+{
+ static bool init = false;
+ static DBENGINE_TIER_STATS stats[RRD_STORAGE_TIERS];
+
+ if (!localhost)
+ return;
+
+ calculate_tier_disk_space_percentage();
+
+ for (size_t tier = 0; tier < storage_tiers; tier++) {
+ STORAGE_ENGINE *eng = localhost->db[tier].eng;
+ if (!eng || eng->seb != STORAGE_ENGINE_BACKEND_DBENGINE)
+ continue;
+
+ if (init == false) {
+ char id[200];
+ snprintfz(id, sizeof(id) - 1, "dbengine_retention_tier%zu", tier);
+ stats[tier].st = rrdset_create_localhost(
+ "netdata",
+ id,
+ NULL,
+ "dbengine retention",
+ "netdata.dbengine_tier_retention",
+ "dbengine space and time retention",
+ "%",
+ "netdata",
+ "stats",
+ 134900, // before "dbengine memory" (dbengine2_statistics_charts)
+ 10,
+ RRDSET_TYPE_LINE);
+
+ stats[tier].rd_space = rrddim_add(stats[tier].st, "space", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ stats[tier].rd_time = rrddim_add(stats[tier].st, "time", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ char tier_str[5];
+ snprintfz(tier_str, 4, "%zu", tier);
+ rrdlabels_add(stats[tier].st->rrdlabels, "tier", tier_str, RRDLABEL_SRC_AUTO);
+
+ rrdset_flag_set(stats[tier].st, RRDSET_FLAG_METADATA_UPDATE);
+ rrdhost_flag_set(stats[tier].st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE);
+ rrdset_metadata_updated(stats[tier].st);
+ }
+
+ time_t first_time_s = storage_engine_global_first_time_s(eng->seb, localhost->db[tier].si);
+ time_t retention = first_time_s ? now_realtime_sec() - first_time_s : 0;
+
+ //
+ // Note: storage_engine_disk_space_used is the exact diskspace (as reported by api/v2/node_instances
+ // get_used_disk_space is used to determine if database cleanup (file rotation should happen)
+ // and adds to the disk space used the desired file size of the active
+ // datafile
+ uint64_t disk_space = get_used_disk_space(multidb_ctx[tier]);
+ //uint64_t disk_space = storage_engine_disk_space_used(eng->seb, localhost->db[tier].si);
+
+ uint64_t config_disk_space = storage_engine_disk_space_max(eng->seb, localhost->db[tier].si);
+ if (!config_disk_space) {
+ config_disk_space = get_directory_free_bytes_space(multidb_ctx[tier]);
+ config_disk_space += disk_space;
+ }
+
+ collected_number disk_percentage = (collected_number) (config_disk_space ? 100 * disk_space / config_disk_space : 0);
+
+ collected_number retention_percentage = (collected_number)multidb_ctx[tier]->config.max_retention_s ?
+ 100 * retention / multidb_ctx[tier]->config.max_retention_s :
+ 0;
+
+ if (retention_percentage > 100)
+ retention_percentage = 100;
+
+ rrddim_set_by_pointer(stats[tier].st, stats[tier].rd_space, (collected_number) disk_percentage);
+ rrddim_set_by_pointer(stats[tier].st, stats[tier].rd_time, (collected_number) retention_percentage);
+
+ rrdset_done(stats[tier].st);
+ }
+ init = true;
+}
+
void dbengine_event_loop(void* arg) {
sanity_check();
- uv_thread_set_name_np(pthread_self(), "DBENGINE");
+ uv_thread_set_name_np("DBENGINE");
service_register(SERVICE_THREAD_TYPE_EVENT_LOOP, NULL, NULL, NULL, true);
worker_register("DBENGINE");
@@ -1721,9 +1930,10 @@ void dbengine_event_loop(void* arg) {
struct rrdeng_main *main = arg;
enum rrdeng_opcode opcode;
struct rrdeng_cmd cmd;
- main->tid = gettid();
+ main->tid = gettid_cached();
fatal_assert(0 == uv_timer_start(&main->timer, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS));
+ fatal_assert(0 == uv_timer_start(&main->retention_timer, retention_timer_cb, TIMER_PERIOD_MS * 60, TIMER_PERIOD_MS * 60));
bool shutdown = false;
while (likely(!shutdown)) {
@@ -1804,7 +2014,7 @@ void dbengine_event_loop(void* arg) {
if (!__atomic_load_n(&ctx->atomic.now_deleting_files, __ATOMIC_RELAXED) &&
ctx->datafiles.first->next != NULL &&
ctx->datafiles.first->next->next != NULL &&
- rrdeng_ctx_exceeded_disk_quota(ctx)) {
+ rrdeng_ctx_tier_cap_exceeded(ctx)) {
__atomic_store_n(&ctx->atomic.now_deleting_files, true, __ATOMIC_RELAXED);
work_dispatch(ctx, NULL, NULL, opcode, database_rotate_tp_worker, after_database_rotate);
@@ -1841,7 +2051,11 @@ void dbengine_event_loop(void* arg) {
uv_close((uv_handle_t *)&main->async, NULL);
(void) uv_timer_stop(&main->timer);
uv_close((uv_handle_t *)&main->timer, NULL);
+
+ (void) uv_timer_stop(&main->retention_timer);
+ uv_close((uv_handle_t *)&main->retention_timer, NULL);
shutdown = true;
+ break;
}
case RRDENG_OPCODE_NOOP: {
diff --git a/database/engine/rrdengine.h b/src/database/engine/rrdengine.h
index cd3352f12..37ea92b8a 100644
--- a/database/engine/rrdengine.h
+++ b/src/database/engine/rrdengine.h
@@ -3,9 +3,6 @@
#ifndef NETDATA_RRDENGINE_H
#define NETDATA_RRDENGINE_H
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
#include <fcntl.h>
#include <lz4.h>
#include <Judy.h>
@@ -30,11 +27,18 @@ extern unsigned rrdeng_pages_per_extent;
struct rrdengine_instance;
struct rrdeng_cmd;
-#define MAX_PAGES_PER_EXTENT (64) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */
+#define MAX_PAGES_PER_EXTENT (109) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */
+#define DEFAULT_PAGES_PER_EXTENT (64)
#define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u"
#define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u"
+typedef struct dbengine_tier_stats {
+ RRDSET *st;
+ RRDDIM *rd_space;
+ RRDDIM *rd_time;
+} DBENGINE_TIER_STATS;
+
typedef enum __attribute__ ((__packed__)) {
// final status for all pages
// if a page does not have one of these, it is considered unroutable
@@ -142,7 +146,7 @@ struct jv2_extents_info {
};
struct jv2_metrics_info {
- uuid_t *uuid;
+ nd_uuid_t *uuid;
uint32_t page_list_header;
time_t first_time_s;
time_t last_time_s;
@@ -153,9 +157,9 @@ struct jv2_metrics_info {
struct jv2_page_info {
time_t start_time_s;
time_t end_time_s;
- time_t update_every_s;
- size_t page_length;
+ uint32_t update_every_s;
uint32_t extent_index;
+ size_t page_length;
void *custom_data;
// private
@@ -217,7 +221,7 @@ struct rrdeng_query_handle {
// internal data
time_t now_s;
- time_t dt_s;
+ uint32_t dt_s;
unsigned position;
unsigned entries;
@@ -348,18 +352,25 @@ extern rrdeng_stats_t rrdeng_reserved_file_descriptors;
extern rrdeng_stats_t global_pg_cache_over_half_dirty_events;
extern rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of deleted pages */
-struct rrdengine_instance {
- struct {
- bool legacy; // true when the db is autonomous for a single host
-
- int tier; // the tier of this ctx
- uint8_t page_type; // default page type for this context
+typedef struct tier_config_prototype {
+ int tier; // the tier of this ctx
+ uint8_t page_type; // default page type for this context
+ uint64_t max_disk_space; // the max disk space this ctx is allowed to use
+ time_t max_retention_s; // The max retention in seconds
+ uint8_t disk_percentage; // percentage of metadata that contribute towards tier space used
+ uint8_t global_compress_alg; // the wanted compression algorithm
+ char dbfiles_path[FILENAME_MAX + 1];
- uint64_t max_disk_space; // the max disk space this ctx is allowed to use
- uint8_t global_compress_alg; // the wanted compression algorithm
+ struct {
+ uint32_t uses;
+ bool enabled;
+ bool is_on_disk;
+ SPINLOCK spinlock;
+ } _internal;
+} TIER_CONFIG_PROTOTYPE;
- char dbfiles_path[FILENAME_MAX + 1];
- } config;
+struct rrdengine_instance {
+ TIER_CONFIG_PROTOTYPE config;
struct {
uv_rwlock_t rwlock; // the linked list of datafiles is protected by this lock
@@ -387,6 +398,8 @@ struct rrdengine_instance {
unsigned extents_currently_being_flushed; // non-zero until we commit data to disk (both datafile and journal file)
time_t first_time_s;
+ uint64_t metrics;
+ uint64_t samples;
} atomic;
struct {
@@ -450,7 +463,7 @@ static inline void ctx_last_flush_fileno_set(struct rrdengine_instance *ctx, uns
void *dbengine_extent_alloc(size_t size);
void dbengine_extent_free(void *extent, size_t size);
-bool rrdeng_ctx_exceeded_disk_quota(struct rrdengine_instance *ctx);
+bool rrdeng_ctx_tier_cap_exceeded(struct rrdengine_instance *ctx);
int init_rrd_files(struct rrdengine_instance *ctx);
void finalize_rrd_files(struct rrdengine_instance *ctx);
bool rrdeng_dbengine_spawn(struct rrdengine_instance *ctx);
@@ -482,7 +495,7 @@ struct page_descr_with_data *page_descriptor_get(void);
typedef struct validated_page_descriptor {
time_t start_time_s;
time_t end_time_s;
- time_t update_every_s;
+ uint32_t update_every_s;
size_t page_length;
size_t point_size;
size_t entries;
@@ -496,19 +509,19 @@ typedef struct validated_page_descriptor {
#define page_entries_by_size(page_length_in_bytes, point_size_in_bytes) \
((page_length_in_bytes) / (point_size_in_bytes))
-VALIDATED_PAGE_DESCRIPTOR validate_page(uuid_t *uuid,
+VALIDATED_PAGE_DESCRIPTOR validate_page(nd_uuid_t *uuid,
time_t start_time_s,
time_t end_time_s,
- time_t update_every_s,
+ uint32_t update_every_s,
size_t page_length,
uint8_t page_type,
size_t entries,
time_t now_s,
- time_t overwrite_zero_update_every_s,
+ uint32_t overwrite_zero_update_every_s,
bool have_read_error,
const char *msg,
RRDENG_COLLECT_PAGE_FLAGS flags);
-VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, time_t overwrite_zero_update_every_s, bool have_read_error);
+VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, uint32_t overwrite_zero_update_every_s, bool have_read_error);
void collect_page_flags_to_buffer(BUFFER *wb, RRDENG_COLLECT_PAGE_FLAGS flags);
typedef enum {
@@ -525,8 +538,22 @@ static inline time_t max_acceptable_collected_time(void) {
void datafile_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile, bool update_retention, bool worker);
+// --------------------------------------------------------------------------------------------------------------------
+// the following functions are used to sort UUIDs in the journal files
+// DO NOT CHANGE, as this will break backwards compatibility with the data files users have.
+
+static inline int journal_uuid_memcmp(const nd_uuid_t *uu1, const nd_uuid_t *uu2) {
+ return memcmp(uu1, uu2, sizeof(nd_uuid_t));
+}
+
static inline int journal_metric_uuid_compare(const void *key, const void *metric) {
- return uuid_memcmp((uuid_t *)key, &(((struct journal_metric_list *) metric)->uuid));
+ return journal_uuid_memcmp((const nd_uuid_t *)key, (const nd_uuid_t *)&(((struct journal_metric_list *) metric)->uuid));
}
+// --------------------------------------------------------------------------------------------------------------------
+uint64_t get_used_disk_space(struct rrdengine_instance *ctx);
+void calculate_tier_disk_space_percentage(void);
+void dbengine_retention_statistics(void);
+uint64_t get_directory_free_bytes_space(struct rrdengine_instance *ctx);
+
#endif /* NETDATA_RRDENGINE_H */
diff --git a/database/engine/rrdengineapi.c b/src/database/engine/rrdengineapi.c
index 1ddce5243..b490e819a 100755
--- a/database/engine/rrdengineapi.c
+++ b/src/database/engine/rrdengineapi.c
@@ -2,49 +2,69 @@
#include "database/engine/rrddiskprotocol.h"
#include "rrdengine.h"
+#include "dbengine-compression.h"
/* Default global database instance */
-struct rrdengine_instance multidb_ctx_storage_tier0;
-struct rrdengine_instance multidb_ctx_storage_tier1;
-struct rrdengine_instance multidb_ctx_storage_tier2;
-struct rrdengine_instance multidb_ctx_storage_tier3;
-struct rrdengine_instance multidb_ctx_storage_tier4;
+struct rrdengine_instance multidb_ctx_storage_tier0 = { 0 };
+struct rrdengine_instance multidb_ctx_storage_tier1 = { 0 };
+struct rrdengine_instance multidb_ctx_storage_tier2 = { 0 };
+struct rrdengine_instance multidb_ctx_storage_tier3 = { 0 };
+struct rrdengine_instance multidb_ctx_storage_tier4 = { 0 };
#define mrg_metric_ctx(metric) (struct rrdengine_instance *)mrg_metric_section(main_mrg, metric)
#if RRD_STORAGE_TIERS != 5
#error RRD_STORAGE_TIERS is not 5 - you need to add allocations here
#endif
-struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
-uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER};
+struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS] = { 0 };
+uint8_t tier_page_type[RRD_STORAGE_TIERS] = {
+ RRDENG_PAGE_TYPE_GORILLA_32BIT,
+ RRDENG_PAGE_TYPE_ARRAY_TIER1,
+ RRDENG_PAGE_TYPE_ARRAY_TIER1,
+ RRDENG_PAGE_TYPE_ARRAY_TIER1,
+ RRDENG_PAGE_TYPE_ARRAY_TIER1};
#if defined(ENV32BIT)
size_t tier_page_size[RRD_STORAGE_TIERS] = {2048, 1024, 192, 192, 192};
+size_t tier_quota_mb[RRD_STORAGE_TIERS] = {512, 512, 512, 0, 0};
#else
size_t tier_page_size[RRD_STORAGE_TIERS] = {4096, 2048, 384, 384, 384};
+size_t tier_quota_mb[RRD_STORAGE_TIERS] = {1024, 1024, 1024, 128, 64};
#endif
-#if PAGE_TYPE_MAX != 2
+#if RRDENG_PAGE_TYPE_MAX != 2
#error PAGE_TYPE_MAX is not 2 - you need to add allocations here
#endif
size_t page_type_size[256] = {
- [PAGE_METRICS] = sizeof(storage_number),
- [PAGE_TIER] = sizeof(storage_number_tier1_t),
- [PAGE_GORILLA_METRICS] = sizeof(storage_number)
+ [RRDENG_PAGE_TYPE_ARRAY_32BIT] = sizeof(storage_number),
+ [RRDENG_PAGE_TYPE_ARRAY_TIER1] = sizeof(storage_number_tier1_t),
+ [RRDENG_PAGE_TYPE_GORILLA_32BIT] = sizeof(storage_number)
};
+static inline void initialize_single_ctx(struct rrdengine_instance *ctx) {
+ memset(ctx, 0, sizeof(*ctx));
+ uv_rwlock_init(&ctx->datafiles.rwlock);
+ rw_spinlock_init(&ctx->njfv2idx.spinlock);
+}
+
__attribute__((constructor)) void initialize_multidb_ctx(void) {
multidb_ctx[0] = &multidb_ctx_storage_tier0;
multidb_ctx[1] = &multidb_ctx_storage_tier1;
multidb_ctx[2] = &multidb_ctx_storage_tier2;
multidb_ctx[3] = &multidb_ctx_storage_tier3;
multidb_ctx[4] = &multidb_ctx_storage_tier4;
+
+ for(int i = 0; i < RRD_STORAGE_TIERS ; i++)
+ initialize_single_ctx(multidb_ctx[i]);
}
int db_engine_journal_check = 0;
-int default_rrdeng_disk_quota_mb = 256;
-int default_multidb_disk_quota_mb = 256;
+bool new_dbengine_defaults = false;
+bool legacy_multihost_db_space = false;
+int default_rrdeng_disk_quota_mb = RRDENG_DEFAULT_TIER_DISK_SPACE_MB;
+int default_multidb_disk_quota_mb = RRDENG_DEFAULT_TIER_DISK_SPACE_MB;
+RRD_BACKFILL default_backfill = RRD_BACKFILL_NEW;
#if defined(ENV32BIT)
int default_rrdeng_page_cache_mb = 16;
@@ -74,14 +94,14 @@ static inline bool rrdeng_page_alignment_release(struct pg_alignment *pa) {
}
// charts call this
-STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) {
+STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *si __maybe_unused, nd_uuid_t *uuid __maybe_unused) {
struct pg_alignment *pa = callocz(1, sizeof(struct pg_alignment));
rrdeng_page_alignment_acquire(pa);
return (STORAGE_METRICS_GROUP *)pa;
}
// charts call this
-void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg) {
+void rrdeng_metrics_group_release(STORAGE_INSTANCE *si __maybe_unused, STORAGE_METRICS_GROUP *smg) {
if(unlikely(!smg)) return;
struct pg_alignment *pa = (struct pg_alignment *)smg;
@@ -92,51 +112,43 @@ void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused,
// metric handle for legacy dbs
/* This UUID is not unique across hosts */
-void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid)
+void rrdeng_generate_unittest_uuid(const char *dim_id, const char *chart_id, nd_uuid_t *ret_uuid)
{
- EVP_MD_CTX *evpctx;
- unsigned char hash_value[EVP_MAX_MD_SIZE];
- unsigned int hash_len;
-
- evpctx = EVP_MD_CTX_create();
- EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
- EVP_DigestUpdate(evpctx, dim_id, strlen(dim_id));
- EVP_DigestUpdate(evpctx, chart_id, strlen(chart_id));
- EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
- EVP_MD_CTX_destroy(evpctx);
- fatal_assert(hash_len > sizeof(uuid_t));
- memcpy(ret_uuid, hash_value, sizeof(uuid_t));
+ CLEAN_BUFFER *wb = buffer_create(100, NULL);
+ buffer_sprintf(wb,"%s.%s", dim_id, chart_id);
+ ND_UUID uuid = UUID_generate_from_hash(buffer_tostring(wb), buffer_strlen(wb));
+ uuid_copy(*ret_uuid, uuid.uuid);
}
-static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
- uuid_t legacy_uuid;
- rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid);
+static METRIC *rrdeng_metric_unittest(STORAGE_INSTANCE *si, const char *rd_id, const char *st_id) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
+ nd_uuid_t legacy_uuid;
+ rrdeng_generate_unittest_uuid(rd_id, st_id, &legacy_uuid);
return mrg_metric_get_and_acquire(main_mrg, &legacy_uuid, (Word_t) ctx);
}
// ----------------------------------------------------------------------------
// metric handle
-void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) {
- METRIC *metric = (METRIC *)db_metric_handle;
+void rrdeng_metric_release(STORAGE_METRIC_HANDLE *smh) {
+ METRIC *metric = (METRIC *)smh;
mrg_metric_release(main_mrg, metric);
}
-STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) {
- METRIC *metric = (METRIC *)db_metric_handle;
+STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *smh) {
+ METRIC *metric = (METRIC *)smh;
return (STORAGE_METRIC_HANDLE *) mrg_metric_dup(main_mrg, metric);
}
-STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *si, nd_uuid_t *uuid) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
return (STORAGE_METRIC_HANDLE *) mrg_metric_get_and_acquire(main_mrg, uuid, (Word_t) ctx);
}
-static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) {
- internal_fatal(!db_instance, "DBENGINE: db_instance is NULL");
+static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *si, nd_uuid_t *uuid) {
+ internal_fatal(!si, "DBENGINE: STORAGE_INSTANCE is NULL");
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
MRG_ENTRY entry = {
.uuid = uuid,
.section = (Word_t)ctx,
@@ -145,32 +157,32 @@ static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid)
.latest_update_every_s = 0,
};
- METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, NULL);
+ bool added;
+ METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, &added);
+ if (added)
+ __atomic_add_fetch(&ctx->atomic.metrics, 1, __ATOMIC_RELAXED);
return metric;
}
-STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
METRIC *metric;
metric = mrg_metric_get_and_acquire(main_mrg, &rd->metric_uuid, (Word_t) ctx);
if(unlikely(!metric)) {
- if(unlikely(ctx->config.legacy)) {
- // this is a single host database
- // generate uuid from the chart and dimensions ids
- // and overwrite the one supplied by rrddim
- metric = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset));
+ if(unlikely(unittest_running)) {
+ metric = rrdeng_metric_unittest(si, rrddim_id(rd), rrdset_id(rd->rrdset));
if (metric)
uuid_copy(rd->metric_uuid, *mrg_metric_uuid(main_mrg, metric));
}
if(likely(!metric))
- metric = rrdeng_metric_create(db_instance, &rd->metric_uuid);
+ metric = rrdeng_metric_create(si, &rd->metric_uuid);
}
#ifdef NETDATA_INTERNAL_CHECKS
- if(uuid_memcmp(&rd->metric_uuid, mrg_metric_uuid(main_mrg, metric)) != 0) {
+ if(!uuid_eq(rd->metric_uuid, *mrg_metric_uuid(main_mrg, metric))) {
char uuid1[UUID_STR_LEN + 1];
char uuid2[UUID_STR_LEN + 1];
@@ -192,14 +204,14 @@ STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE
// collect ops
static inline void check_and_fix_mrg_update_every(struct rrdeng_collect_handle *handle) {
- if(unlikely((time_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) {
- internal_error(true, "DBENGINE: collection handle has update every %ld, but the metric registry has %ld. Fixing it.",
- (time_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric));
+ if(unlikely((uint32_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) {
+ internal_error(true, "DBENGINE: collection handle has update every %u, but the metric registry has %u. Fixing it.",
+ (uint32_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric));
if(unlikely(!handle->update_every_ut))
handle->update_every_ut = (usec_t)mrg_metric_get_update_every_s(main_mrg, handle->metric) * USEC_PER_SEC;
else
- mrg_metric_set_update_every(main_mrg, handle->metric, (time_t)(handle->update_every_ut / USEC_PER_SEC));
+ mrg_metric_set_update_every(main_mrg, handle->metric, (uint32_t)(handle->update_every_ut / USEC_PER_SEC));
}
}
@@ -210,10 +222,10 @@ static inline bool check_completed_page_consistency(struct rrdeng_collect_handle
struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
- uuid_t *uuid = mrg_metric_uuid(main_mrg, handle->metric);
+ nd_uuid_t *uuid = mrg_metric_uuid(main_mrg, handle->metric);
time_t start_time_s = pgc_page_start_time_s(handle->pgc_page);
time_t end_time_s = pgc_page_end_time_s(handle->pgc_page);
- time_t update_every_s = pgc_page_update_every_s(handle->pgc_page);
+ uint32_t update_every_s = pgc_page_update_every_s(handle->pgc_page);
size_t page_length = handle->page_position * CTX_POINT_SIZE_BYTES(ctx);
size_t entries = handle->page_position;
time_t overwrite_zero_update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
@@ -245,8 +257,8 @@ static inline bool check_completed_page_consistency(struct rrdeng_collect_handle
* Gets a handle for storing metrics to the database.
* The handle must be released with rrdeng_store_metric_final().
*/
-STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) {
- METRIC *metric = (METRIC *)db_metric_handle;
+STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg) {
+ METRIC *metric = (METRIC *)smh;
struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
bool is_1st_metric_writer = true;
@@ -262,7 +274,7 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri
struct rrdeng_collect_handle *handle;
handle = callocz(1, sizeof(struct rrdeng_collect_handle));
- handle->common.backend = STORAGE_ENGINE_BACKEND_DBENGINE;
+ handle->common.seb = STORAGE_ENGINE_BACKEND_DBENGINE;
handle->metric = metric;
handle->pgc_page = NULL;
@@ -288,15 +300,15 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri
// data collection may be able to go back in time and during the addition of new pages
// clean pages may be found matching ours!
- time_t db_first_time_s, db_last_time_s, db_update_every_s;
- mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
+ time_t db_first_time_s, db_last_time_s;
+ mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, NULL);
handle->page_end_time_ut = (usec_t)db_last_time_s * USEC_PER_SEC;
return (STORAGE_COLLECT_HANDLE *)handle;
}
-void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) {
- struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *sch) {
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch;
if (unlikely(!handle->pgc_page))
return;
@@ -307,7 +319,17 @@ void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_h
else {
check_completed_page_consistency(handle);
mrg_metric_set_clean_latest_time_s(main_mrg, handle->metric, pgc_page_end_time_s(handle->pgc_page));
- pgc_page_hot_to_dirty_and_release(main_cache, handle->pgc_page);
+
+ struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
+ time_t start_time_s = pgc_page_start_time_s(handle->pgc_page);
+ time_t end_time_s = pgc_page_end_time_s(handle->pgc_page);
+ uint32_t update_every_s = mrg_metric_get_update_every_s(main_mrg, handle->metric);
+ if (end_time_s && start_time_s && end_time_s > start_time_s && update_every_s) {
+ uint64_t add_samples = (end_time_s - start_time_s) / update_every_s;
+ __atomic_add_fetch(&ctx->atomic.samples, add_samples, __ATOMIC_RELAXED);
+ }
+
+ pgc_page_hot_to_dirty_and_release(main_cache, handle->pgc_page, false);
}
mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, 0);
@@ -336,7 +358,7 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha
PGD *data,
size_t data_size) {
time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC);
- const time_t update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC);
+ const uint32_t update_every_s = (uint32_t)(handle->update_every_ut / USEC_PER_SEC);
PGC_ENTRY page_entry = {
.section = (Word_t) ctx,
@@ -345,7 +367,7 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha
.end_time_s = point_in_time_s,
.size = data_size,
.data = data,
- .update_every_s = (uint32_t) update_every_s,
+ .update_every_s = update_every_s,
.hot = true
};
@@ -364,11 +386,11 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha
nd_log_limit_static_global_var(erl, 1, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
#endif
- "DBENGINE: metric '%s' new page from %ld to %ld, update every %ld, has a conflict in main cache "
- "with existing %s%s page from %ld to %ld, update every %ld - "
+ "DBENGINE: metric '%s' new page from %ld to %ld, update every %u, has a conflict in main cache "
+ "with existing %s%s page from %ld to %ld, update every %u - "
"is it collected more than once?",
uuid,
- page_entry.start_time_s, page_entry.end_time_s, (time_t)page_entry.update_every_s,
+ page_entry.start_time_s, page_entry.end_time_s, page_entry.update_every_s,
pgc_is_page_hot(pgc_page) ? "hot" : "not-hot",
pgc_page_data(pgc_page) == PGD_EMPTY ? " gap" : "",
pgc_page_start_time_s(pgc_page), pgc_page_end_time_s(pgc_page), pgc_page_update_every_s(pgc_page)
@@ -444,14 +466,14 @@ static PGD *rrdeng_alloc_new_page_data(struct rrdeng_collect_handle *handle, siz
*data_size = size;
switch (ctx->config.page_type) {
- case PAGE_METRICS:
- case PAGE_TIER:
+ case RRDENG_PAGE_TYPE_ARRAY_32BIT:
+ case RRDENG_PAGE_TYPE_ARRAY_TIER1:
d = pgd_create(ctx->config.page_type, slots);
break;
- case PAGE_GORILLA_METRICS:
+ case RRDENG_PAGE_TYPE_GORILLA_32BIT:
// ignore slots, and use the fixed number of slots per gorilla buffer.
// gorilla will automatically add more buffers if needed.
- d = pgd_create(ctx->config.page_type, GORILLA_BUFFER_SLOTS);
+ d = pgd_create(ctx->config.page_type, RRDENG_GORILLA_32BIT_BUFFER_SLOTS);
break;
default:
fatal("Unknown page type: %uc\n", ctx->config.page_type);
@@ -461,7 +483,7 @@ static PGD *rrdeng_alloc_new_page_data(struct rrdeng_collect_handle *handle, siz
return d;
}
-static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_handle,
+static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *sch,
const usec_t point_in_time_ut,
const NETDATA_DOUBLE n,
const NETDATA_DOUBLE min_value,
@@ -470,7 +492,7 @@ static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_
const uint16_t anomaly_count,
const SN_FLAGS flags)
{
- struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch;
struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
if(unlikely(!handle->page_data))
@@ -497,7 +519,7 @@ static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_
if(unlikely(++handle->page_position >= handle->page_entries_max)) {
internal_fatal(handle->page_position > handle->page_entries_max, "DBENGINE: exceeded page max number of points");
handle->page_flags |= RRDENG_PAGE_FULL;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
}
}
@@ -543,7 +565,7 @@ static void store_metric_next_error_log(struct rrdeng_collect_handle *handle __m
#endif
}
-void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
+void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *sch,
const usec_t point_in_time_ut,
const NETDATA_DOUBLE n,
const NETDATA_DOUBLE min_value,
@@ -554,7 +576,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
{
timing_step(TIMING_STEP_RRDSET_STORE_METRIC);
- struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch;
#ifdef NETDATA_INTERNAL_CHECKS
if(unlikely(point_in_time_ut > (usec_t)max_acceptable_collected_time() * USEC_PER_SEC))
@@ -571,11 +593,11 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
if(handle->pgc_page) {
if (unlikely(delta_ut < handle->update_every_ut)) {
handle->page_flags |= RRDENG_PAGE_STEP_TOO_SMALL;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
}
else if (unlikely(delta_ut % handle->update_every_ut)) {
handle->page_flags |= RRDENG_PAGE_STEP_UNALIGNED;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
}
else {
size_t points_gap = delta_ut / handle->update_every_ut;
@@ -583,7 +605,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
if (points_gap >= page_remaining_points) {
handle->page_flags |= RRDENG_PAGE_BIG_GAP;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
}
else {
// loop to fill the gap
@@ -594,7 +616,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
this_ut <= stop_ut;
this_ut = handle->page_end_time_ut + handle->update_every_ut) {
rrdeng_store_metric_append_point(
- collection_handle,
+ sch,
this_ut,
NAN, NAN, NAN,
1, 0,
@@ -618,7 +640,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
timing_step(TIMING_STEP_DBENGINE_FIRST_CHECK);
- rrdeng_store_metric_append_point(collection_handle,
+ rrdeng_store_metric_append_point(sch,
point_in_time_ut,
n, min_value, max_value,
count, anomaly_count,
@@ -629,12 +651,12 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle,
* Releases the database reference from the handle for storing metrics.
* Returns 1 if it's safe to delete the dimension.
*/
-int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
- struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *sch) {
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch;
struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
handle->page_flags |= RRDENG_PAGE_COLLECT_FINALIZE;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
rrdeng_page_alignment_release(handle->alignment);
__atomic_sub_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED);
@@ -644,8 +666,8 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_clear_writer(main_mrg, handle->metric))
internal_fatal(true, "DBENGINE: metric is already released");
- time_t first_time_s, last_time_s, update_every_s;
- mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, &update_every_s);
+ time_t first_time_s, last_time_s;
+ mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, NULL);
mrg_metric_release(main_mrg, handle->metric);
freez(handle);
@@ -656,8 +678,8 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
return 0;
}
-void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) {
- struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every) {
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch;
check_and_fix_mrg_update_every(handle);
METRIC *metric = handle->metric;
@@ -667,7 +689,7 @@ void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *col
return;
handle->page_flags |= RRDENG_PAGE_UPDATE_EVERY_CHANGE;
- rrdeng_store_metric_flush_current_page(collection_handle);
+ rrdeng_store_metric_flush_current_page(sch);
mrg_metric_set_update_every(main_mrg, metric, update_every);
handle->update_every_ut = update_every_ut;
}
@@ -679,7 +701,7 @@ void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *col
SPINLOCK global_query_handle_spinlock = NETDATA_SPINLOCK_INITIALIZER;
static struct rrdeng_query_handle *global_query_handle_ll = NULL;
static void register_query_handle(struct rrdeng_query_handle *handle) {
- handle->query_pid = gettid();
+ handle->query_pid = gettid_cached();
handle->started_time_s = now_realtime_sec();
spinlock_lock(&global_query_handle_spinlock);
@@ -704,17 +726,15 @@ static void unregister_query_handle(struct rrdeng_query_handle *handle __maybe_u
* Gets a handle for loading metrics from the database.
* The handle must be released with rrdeng_load_metric_final().
*/
-void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
- struct storage_engine_query_handle *rrddim_handle,
+void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *smh,
+ struct storage_engine_query_handle *seqh,
time_t start_time_s,
time_t end_time_s,
STORAGE_PRIORITY priority)
{
usec_t started_ut = now_monotonic_usec();
- netdata_thread_disable_cancelability();
-
- METRIC *metric = (METRIC *)db_metric_handle;
+ METRIC *metric = (METRIC *)smh;
struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
struct rrdeng_query_handle *handle;
@@ -736,7 +756,8 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
// is inserted into the main cache, to avoid scanning the journals
// again for pages matching the gap.
- time_t db_first_time_s, db_last_time_s, db_update_every_s;
+ time_t db_first_time_s, db_last_time_s;
+ uint32_t db_update_every_s;
mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) == PAGE_IS_IN_RANGE) {
@@ -750,11 +771,11 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every);
}
- rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
- rrddim_handle->start_time_s = handle->start_time_s;
- rrddim_handle->end_time_s = handle->end_time_s;
- rrddim_handle->priority = priority;
- rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
+ seqh->handle = (STORAGE_QUERY_HANDLE *) handle;
+ seqh->start_time_s = handle->start_time_s;
+ seqh->end_time_s = handle->end_time_s;
+ seqh->priority = priority;
+ seqh->seb = STORAGE_ENGINE_BACKEND_DBENGINE;
pg_cache_preload(handle);
@@ -766,16 +787,16 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
handle->now_s = start_time_s;
handle->dt_s = db_update_every_s;
- rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
- rrddim_handle->start_time_s = handle->start_time_s;
- rrddim_handle->end_time_s = 0;
- rrddim_handle->priority = priority;
- rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE;
+ seqh->handle = (STORAGE_QUERY_HANDLE *) handle;
+ seqh->start_time_s = handle->start_time_s;
+ seqh->end_time_s = 0;
+ seqh->priority = priority;
+ seqh->seb = STORAGE_ENGINE_BACKEND_DBENGINE;
}
}
-static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_handle, bool debug_this __maybe_unused) {
- struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
+static bool rrdeng_load_page_next(struct storage_engine_query_handle *seqh, bool debug_this __maybe_unused) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle;
struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric);
if (likely(handle->page)) {
@@ -785,7 +806,7 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han
pgdc_reset(&handle->pgdc, NULL, UINT32_MAX);
}
- if (unlikely(handle->now_s > rrddim_handle->end_time_s))
+ if (unlikely(handle->now_s > seqh->end_time_s))
return false;
size_t entries = 0;
@@ -799,7 +820,7 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han
time_t page_start_time_s = pgc_page_start_time_s(handle->page);
time_t page_end_time_s = pgc_page_end_time_s(handle->page);
- time_t page_update_every_s = pgc_page_update_every_s(handle->page);
+ uint32_t page_update_every_s = pgc_page_update_every_s(handle->page);
unsigned position;
if(likely(handle->now_s >= page_start_time_s && handle->now_s <= page_end_time_s)) {
@@ -810,13 +831,13 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han
}
else {
position = (handle->now_s - page_start_time_s) * (entries - 1) / (page_end_time_s - page_start_time_s);
- time_t point_end_time_s = page_start_time_s + position * page_update_every_s;
+ time_t point_end_time_s = page_start_time_s + position * (time_t) page_update_every_s;
while(point_end_time_s < handle->now_s && position + 1 < entries) {
// https://github.com/netdata/netdata/issues/14411
// we really need a while() here, because the delta may be
// 2 points at higher tiers
position++;
- point_end_time_s = page_start_time_s + position * page_update_every_s;
+ point_end_time_s = page_start_time_s + position * (time_t) page_update_every_s;
}
handle->now_s = point_end_time_s;
}
@@ -845,11 +866,11 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han
// Returns the metric and sets its timestamp into current_time
// IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
// IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
-STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) {
- struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
+STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *seqh) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle;
STORAGE_POINT sp;
- if (unlikely(handle->now_s > rrddim_handle->end_time_s)) {
+ if (unlikely(handle->now_s > seqh->end_time_s)) {
storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
goto prepare_for_next_iteration;
}
@@ -857,8 +878,8 @@ STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim
if (unlikely(!handle->page || handle->position >= handle->entries)) {
// We need to get a new page
- if (!rrdeng_load_page_next(rrddim_handle, false)) {
- handle->now_s = rrddim_handle->end_time_s;
+ if (!rrdeng_load_page_next(seqh, false)) {
+ handle->now_s = seqh->end_time_s;
storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s);
goto prepare_for_next_iteration;
}
@@ -870,7 +891,7 @@ STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim
pgdc_get_next_point(&handle->pgdc, handle->position, &sp);
prepare_for_next_iteration:
- internal_fatal(sp.end_time_s < rrddim_handle->start_time_s, "DBENGINE: this point is too old for this query");
+ internal_fatal(sp.end_time_s < seqh->start_time_s, "DBENGINE: this point is too old for this query");
internal_fatal(sp.end_time_s < handle->now_s, "DBENGINE: this point is too old for this point in time");
handle->now_s += handle->dt_s;
@@ -879,17 +900,17 @@ prepare_for_next_iteration:
return sp;
}
-int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle) {
- struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
- return (handle->now_s > rrddim_handle->end_time_s);
+int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *seqh) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle;
+ return (handle->now_s > seqh->end_time_s);
}
/*
* Releases the database reference from the handle for loading metrics.
*/
-void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle)
+void rrdeng_load_metric_finalize(struct storage_engine_query_handle *seqh)
{
- struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle;
if (handle->page) {
pgc_page_release(main_cache, handle->page);
@@ -901,24 +922,23 @@ void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_hand
unregister_query_handle(handle);
rrdeng_query_handle_release(handle);
- rrddim_handle->handle = NULL;
- netdata_thread_enable_cancelability();
+ seqh->handle = NULL;
}
-time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle) {
- struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle;
+time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *seqh) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle;
if(handle->pdc) {
rrdeng_prep_wait(handle->pdc);
- if (handle->pdc->optimal_end_time_s > rrddim_handle->end_time_s)
- rrddim_handle->end_time_s = handle->pdc->optimal_end_time_s;
+ if (handle->pdc->optimal_end_time_s > seqh->end_time_s)
+ seqh->end_time_s = handle->pdc->optimal_end_time_s;
}
- return rrddim_handle->end_time_s;
+ return seqh->end_time_s;
}
-time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
- METRIC *metric = (METRIC *)db_metric_handle;
+time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *smh) {
+ METRIC *metric = (METRIC *)smh;
time_t latest_time_s = 0;
if (metric)
@@ -927,8 +947,8 @@ time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
return latest_time_s;
}
-time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
- METRIC *metric = (METRIC *)db_metric_handle;
+time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *smh) {
+ METRIC *metric = (METRIC *)smh;
time_t oldest_time_s = 0;
if (metric)
@@ -937,9 +957,9 @@ time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
return oldest_time_s;
}
-bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s)
+bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, nd_uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s)
{
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
if (unlikely(!ctx)) {
netdata_log_error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__);
return false;
@@ -949,26 +969,35 @@ bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_
if (unlikely(!metric))
return false;
- time_t update_every_s;
- mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, &update_every_s);
+ mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, NULL);
mrg_metric_release(main_mrg, metric);
return true;
}
-uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
return ctx->config.max_disk_space;
}
-uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
return __atomic_load_n(&ctx->atomic.current_disk_space, __ATOMIC_RELAXED);
}
-time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+uint64_t rrdeng_metrics(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
+ return __atomic_load_n(&ctx->atomic.metrics, __ATOMIC_RELAXED);
+}
+
+uint64_t rrdeng_samples(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
+ return __atomic_load_n(&ctx->atomic.samples, __ATOMIC_RELAXED);
+}
+
+time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
time_t t = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED);
if(t == LONG_MAX || t < 0)
@@ -977,8 +1006,8 @@ time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) {
return t;
}
-size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *si) {
+ struct rrdengine_instance *ctx = (struct rrdengine_instance *)si;
return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED);
}
@@ -1099,19 +1128,19 @@ void rrdeng_readiness_wait(struct rrdengine_instance *ctx) {
netdata_log_info("DBENGINE: tier %d is ready for data collection and queries", ctx->config.tier);
}
-bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance) {
- struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
- return ctx->config.legacy;
-}
-
void rrdeng_exit_mode(struct rrdengine_instance *ctx) {
__atomic_store_n(&ctx->quiesce.exit_mode, true, __ATOMIC_RELAXED);
}
/*
* Returns 0 on success, negative on error
*/
-int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
- unsigned disk_space_mb, size_t tier) {
+int rrdeng_init(
+ struct rrdengine_instance **ctxp,
+ const char *dbfiles_path,
+ unsigned disk_space_mb,
+ size_t tier,
+ time_t max_retention_s)
+{
struct rrdengine_instance *ctx;
uint32_t max_open_files;
@@ -1130,30 +1159,33 @@ int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
return UV_EMFILE;
}
- if(NULL == ctxp) {
- ctx = multidb_ctx[tier];
- memset(ctx, 0, sizeof(*ctx));
- ctx->config.legacy = false;
- }
- else {
- *ctxp = ctx = callocz(1, sizeof(*ctx));
- ctx->config.legacy = true;
+ if(ctxp) {
+ *ctxp = ctx = mallocz(sizeof(*ctx));
+ initialize_single_ctx(ctx);
}
+ else
+ ctx = multidb_ctx[tier];
ctx->config.tier = (int)tier;
ctx->config.page_type = tier_page_type[tier];
- ctx->config.global_compress_alg = RRD_LZ4;
- if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
- disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
- ctx->config.max_disk_space = disk_space_mb * 1048576LLU;
+ ctx->config.global_compress_alg = dbengine_default_compression();
+
strncpyz(ctx->config.dbfiles_path, dbfiles_path, sizeof(ctx->config.dbfiles_path) - 1);
ctx->config.dbfiles_path[sizeof(ctx->config.dbfiles_path) - 1] = '\0';
+ if (disk_space_mb && disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
+ disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
+
+ ctx->config.max_disk_space = disk_space_mb * 1048576LLU;
+
+ ctx->config.max_retention_s = max_retention_s;
+
ctx->atomic.transaction_id = 1;
ctx->quiesce.enabled = false;
- rw_spinlock_init(&ctx->njfv2idx.spinlock);
ctx->atomic.first_time_s = LONG_MAX;
+ ctx->atomic.metrics = 0;
+ ctx->atomic.samples = 0;
if (rrdeng_dbengine_spawn(ctx) && !init_rrd_files(ctx)) {
// success - we run this ctx too
@@ -1161,7 +1193,7 @@ int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
return 0;
}
- if (ctx->config.legacy) {
+ if (unittest_running) {
freez(ctx);
if (ctxp)
*ctxp = NULL;
@@ -1192,17 +1224,17 @@ int rrdeng_exit(struct rrdengine_instance *ctx) {
size_t count = 10;
while(__atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED) && count && !unittest_running) {
if(!logged) {
- netdata_log_info("DBENGINE: waiting for collectors to finish on tier %d...", (ctx->config.legacy) ? -1 : ctx->config.tier);
+ netdata_log_info("DBENGINE: waiting for collectors to finish on tier %d...", ctx->config.tier);
logged = true;
}
sleep_usec(100 * USEC_PER_MS);
count--;
}
- netdata_log_info("DBENGINE: flushing main cache for tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
+ netdata_log_info("DBENGINE: flushing main cache for tier %d", ctx->config.tier);
pgc_flush_all_hot_and_dirty_pages(main_cache, (Word_t)ctx);
- netdata_log_info("DBENGINE: shutting down tier %d", (ctx->config.legacy) ? -1 : ctx->config.tier);
+ netdata_log_info("DBENGINE: shutting down tier %d", ctx->config.tier);
struct completion completion = {};
completion_init(&completion);
rrdeng_enq_cmd(ctx, RRDENG_OPCODE_CTX_SHUTDOWN, NULL, &completion, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
@@ -1211,7 +1243,7 @@ int rrdeng_exit(struct rrdengine_instance *ctx) {
finalize_rrd_files(ctx);
- if(ctx->config.legacy)
+ if (unittest_running) //(ctx->config.unittest)
freez(ctx);
rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
diff --git a/database/engine/rrdengineapi.h b/src/database/engine/rrdengineapi.h
index 7ae0e7079..cf9606255 100644
--- a/database/engine/rrdengineapi.h
+++ b/src/database/engine/rrdengineapi.h
@@ -6,7 +6,8 @@
#include "rrdengine.h"
#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (8)
-#define RRDENG_MIN_DISK_SPACE_MB (64)
+#define RRDENG_MIN_DISK_SPACE_MB (256)
+#define RRDENG_DEFAULT_TIER_DISK_SPACE_MB (1024)
#define RRDENG_NR_STATS (38)
@@ -17,57 +18,64 @@ extern int default_rrdeng_extent_cache_mb;
extern int db_engine_journal_check;
extern int default_rrdeng_disk_quota_mb;
extern int default_multidb_disk_quota_mb;
+extern bool new_dbengine_defaults;
+extern bool legacy_multihost_db_space;
+extern RRD_BACKFILL default_backfill;
+
extern struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
extern size_t page_type_size[];
extern size_t tier_page_size[];
+extern size_t tier_quota_mb[];
extern uint8_t tier_page_type[];
#define CTX_POINT_SIZE_BYTES(ctx) page_type_size[(ctx)->config.page_type]
-void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid);
-
-STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance);
-STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid);
-void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle);
-STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle);
+STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si);
+STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *si, nd_uuid_t *uuid);
+void rrdeng_metric_release(STORAGE_METRIC_HANDLE *smh);
+STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *smh);
-STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg);
-void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle);
-void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every);
-void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, NETDATA_DOUBLE n,
+STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg);
+void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *sch);
+void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every);
+void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n,
NETDATA_DOUBLE min_value,
NETDATA_DOUBLE max_value,
uint16_t count,
uint16_t anomaly_count,
SN_FLAGS flags);
-int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle);
+int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *sch);
-void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrddim_handle,
+void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh,
time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority);
-STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle);
+STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *seqh);
-int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle);
-void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle);
-time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
-time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
-time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle);
+int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *seqh);
+void rrdeng_load_metric_finalize(struct storage_engine_query_handle *seqh);
+time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *smh);
+time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *smh);
+time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *seqh);
void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array);
/* must call once before using anything */
-int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path,
- unsigned disk_space_mb, size_t tier);
+int rrdeng_init(
+ struct rrdengine_instance **ctxp,
+ const char *dbfiles_path,
+ unsigned disk_space_mb,
+ size_t tier,
+ time_t max_retention_s);
void rrdeng_readiness_wait(struct rrdengine_instance *ctx);
void rrdeng_exit_mode(struct rrdengine_instance *ctx);
int rrdeng_exit(struct rrdengine_instance *ctx);
void rrdeng_prepare_exit(struct rrdengine_instance *ctx);
-bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s);
+bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, nd_uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s);
-extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid);
-extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg);
+extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *si, nd_uuid_t *uuid);
+extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg);
typedef struct rrdengine_size_statistics {
size_t default_granularity_secs;
@@ -221,9 +229,5 @@ struct rrdeng_cache_efficiency_stats rrdeng_get_cache_efficiency_stats(void);
RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx);
size_t rrdeng_collectors_running(struct rrdengine_instance *ctx);
-bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance);
-
-uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance);
-uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance);
#endif /* NETDATA_RRDENGINEAPI_H */
diff --git a/database/engine/rrdenginelib.h b/src/database/engine/rrdenginelib.h
index a0febd4f4..e5ceed286 100644
--- a/database/engine/rrdenginelib.h
+++ b/src/database/engine/rrdenginelib.h
@@ -88,7 +88,5 @@ static inline int open_file_buffered_io(char *path, int flags, uv_file *file)
{
return open_file_for_io(path, flags, file, 0);
}
-int compute_multidb_diskspace();
-int is_legacy_child(const char *machine_guid);
#endif /* NETDATA_RRDENGINELIB_H */
diff --git a/database/engine/metadata_log/README.md b/src/go/collectors/go.d.plugin/agent/testdata/agent-empty.conf
index e69de29bb..e69de29bb 100644
--- a/database/engine/metadata_log/README.md
+++ b/src/go/collectors/go.d.plugin/agent/testdata/agent-empty.conf