From 3c315f0fff93aa072472abc10815963ac0035268 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 12 Aug 2022 09:26:11 +0200 Subject: Adding upstream version 1.36.0. Signed-off-by: Daniel Baumann --- database/engine/pagecache.c | 207 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 185 insertions(+), 22 deletions(-) (limited to 'database/engine/pagecache.c') diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index cddbf9e1f..39f7642d0 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -3,6 +3,50 @@ #include "rrdengine.h" +ARAL page_descr_aral = { + .element_size = sizeof(struct rrdeng_page_descr), + .elements = 20000, + .filename = "page_descriptors", + .cache_dir = &netdata_configured_cache_dir, + .use_mmap = false, + .internal.initialized = false +}; + +void rrdeng_page_descr_aral_go_singlethreaded(void) { + page_descr_aral.internal.lockless = true; +} +void rrdeng_page_descr_aral_go_multithreaded(void) { + page_descr_aral.internal.lockless = false; +} + +struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void) { + struct rrdeng_page_descr *descr; + descr = arrayalloc_mallocz(&page_descr_aral); + return descr; +} + +void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr) { + arrayalloc_freez(&page_descr_aral, descr); +} + +void rrdeng_page_descr_use_malloc(void) { + if(page_descr_aral.internal.initialized) + error("DBENGINE: cannot change ARAL allocation policy after it has been initialized."); + else + page_descr_aral.use_mmap = false; +} + +void rrdeng_page_descr_use_mmap(void) { + if(page_descr_aral.internal.initialized) + error("DBENGINE: cannot change ARAL allocation policy after it has been initialized."); + else + page_descr_aral.use_mmap = true; +} + +bool rrdeng_page_descr_is_mmap(void) { + return page_descr_aral.use_mmap; +} + /* Forward declarations */ static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx); @@ -81,7 +125,7 @@ struct rrdeng_page_descr *pg_cache_create_descr(void) { struct rrdeng_page_descr *descr; - descr = mallocz(sizeof(*descr)); + descr = rrdeng_page_descr_mallocz(); descr->page_length = 0; descr->start_time = INVALID_TIME; descr->end_time = INVALID_TIME; @@ -238,8 +282,7 @@ static void pg_cache_release_pages(struct rrdengine_instance *ctx, unsigned numb */ unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx) { - /* it's twice the number of producers since we pin 2 pages per producer */ - return ctx->max_cache_pages + 2 * (unsigned long)ctx->metric_API_max_producers; + return ctx->max_cache_pages + (unsigned long)ctx->metric_API_max_producers; } /* @@ -248,8 +291,7 @@ unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx) */ unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx) { - /* it's twice the number of producers since we pin 2 pages per producer */ - return ctx->cache_pages_low_watermark + 2 * (unsigned long)ctx->metric_API_max_producers; + return ctx->cache_pages_low_watermark + (unsigned long)ctx->metric_API_max_producers; } /* @@ -496,7 +538,7 @@ uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_d (void)sleep_usec(1000); /* 1 msec */ } destroy: - freez(descr); + rrdeng_page_descr_freez(descr); pg_cache_update_metric_times(page_index); return can_delete_metric; @@ -1069,9 +1111,9 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index int retry_count = 0; while (1) { descr = find_first_page_in_time_range(page_index, start_time, end_time); - if (NULL == descr || 0 == descr->page_length || retry_count == MAX_PAGE_CACHE_RETRY_WAIT) { + if (NULL == descr || 0 == descr->page_length || retry_count == default_rrdeng_page_fetch_retries) { /* non-empty page not found */ - if (retry_count == MAX_PAGE_CACHE_RETRY_WAIT) + if (retry_count == default_rrdeng_page_fetch_retries) error_report("Page cache timeout while waiting for page %p : returning FAIL", descr); uv_rwlock_rdunlock(&page_index->lock); @@ -1117,7 +1159,7 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index if (!(flags & RRD_PAGE_POPULATED)) page_not_in_cache = 1; - if (pg_cache_timedwait_event_unsafe(descr, 1) == UV_ETIMEDOUT) { + if (pg_cache_timedwait_event_unsafe(descr, default_rrdeng_page_fetch_timeout) == UV_ETIMEDOUT) { error_report("Page cache timeout while waiting for page %p : retry count = %d", descr, retry_count); ++retry_count; } @@ -1196,24 +1238,66 @@ void init_page_cache(struct rrdengine_instance *ctx) init_committed_page_index(ctx); } + + +/* + * METRIC # number + * 1. INDEX: JudyHS # bytes + * 2. DATA: page_index # bytes + * + * PAGE (1 page of 1 metric) # number + * 1. INDEX AT METRIC: page_index->JudyL_array # bytes + * 2. DATA: descr # bytes + * + * PAGE CACHE (1 page of 1 metric at the cache) # number + * 1. pg_cache_descr (if PG_CACHE_DESCR_ALLOCATED) # bytes + * 2. data (if RRD_PAGE_POPULATED) # bytes + * + */ + + void free_page_cache(struct rrdengine_instance *ctx) { struct page_cache *pg_cache = &ctx->pg_cache; - Word_t ret_Judy, bytes_freed = 0; Pvoid_t *PValue; struct pg_cache_page_index *page_index, *prev_page_index; Word_t Index; struct rrdeng_page_descr *descr; struct page_cache_descr *pg_cache_descr; + Word_t metrics_number = 0, + metrics_bytes = 0, + metrics_index_bytes = 0, + metrics_duration = 0; + + Word_t pages_number = 0, + pages_bytes = 0, + pages_index_bytes = 0; + + Word_t pages_size_per_type[256] = { 0 }, + pages_count_per_type[256] = { 0 }; + + Word_t cache_pages_number = 0, + cache_pages_bytes = 0, + cache_pages_data_bytes = 0; + + size_t points_in_db = 0, + uncompressed_points_size = 0, + seconds_in_db = 0, + single_point_pages = 0; + + Word_t pages_dirty_index_bytes = 0; + + usec_t oldest_time_ut = LONG_MAX, latest_time_ut = 0; + /* Free committed page index */ - ret_Judy = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0); + pages_dirty_index_bytes = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0); fatal_assert(NULL == pg_cache->committed_page_index.JudyL_array); - bytes_freed += ret_Judy; for (page_index = pg_cache->metrics_index.last_page_index ; page_index != NULL ; page_index = prev_page_index) { + prev_page_index = page_index->prev; /* Find first page in range */ @@ -1221,37 +1305,116 @@ void free_page_cache(struct rrdengine_instance *ctx) PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); descr = unlikely(NULL == PValue) ? NULL : *PValue; + size_t metric_duration = 0; + size_t metric_update_every = 0; + size_t metric_single_point_pages = 0; + while (descr != NULL) { /* Iterate all page descriptors of this metric */ if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) { + cache_pages_number++; + /* Check rrdenglocking.c */ pg_cache_descr = descr->pg_cache_descr; if (pg_cache_descr->flags & RRD_PAGE_POPULATED) { dbengine_page_free(pg_cache_descr->page); - bytes_freed += RRDENG_BLOCK_SIZE; + cache_pages_data_bytes += RRDENG_BLOCK_SIZE; } rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); - bytes_freed += sizeof(*pg_cache_descr); + cache_pages_bytes += sizeof(*pg_cache_descr); } - freez(descr); - bytes_freed += sizeof(*descr); + + if(descr->start_time < oldest_time_ut) + oldest_time_ut = descr->start_time; + + if(descr->end_time > latest_time_ut) + latest_time_ut = descr->end_time; + + pages_size_per_type[descr->type] += descr->page_length; + pages_count_per_type[descr->type]++; + + size_t points_in_page = (descr->page_length / PAGE_POINT_SIZE_BYTES(descr)); + size_t page_duration = ((descr->end_time - descr->start_time) / USEC_PER_SEC); + size_t update_every = (page_duration == 0) ? 1 : page_duration / (points_in_page - 1); + + if (!page_duration && metric_update_every) { + page_duration = metric_update_every; + update_every = metric_update_every; + } + else if(page_duration) + metric_update_every = update_every; + + uncompressed_points_size += descr->page_length; + + if(page_duration > 0) { + page_duration = update_every * points_in_page; + metric_duration += page_duration; + seconds_in_db += page_duration; + points_in_db += descr->page_length / PAGE_POINT_SIZE_BYTES(descr); + } + else + metric_single_point_pages++; + + rrdeng_page_descr_freez(descr); + pages_bytes += sizeof(*descr); + pages_number++; PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0); descr = unlikely(NULL == PValue) ? NULL : *PValue; } + if(metric_single_point_pages && metric_update_every) { + points_in_db += metric_single_point_pages; + seconds_in_db += metric_update_every * metric_single_point_pages; + metric_duration += metric_update_every * metric_single_point_pages; + } + else + single_point_pages += metric_single_point_pages; + /* Free page index */ - ret_Judy = JudyLFreeArray(&page_index->JudyL_array, PJE0); + pages_index_bytes += JudyLFreeArray(&page_index->JudyL_array, PJE0); fatal_assert(NULL == page_index->JudyL_array); - bytes_freed += ret_Judy; freez(page_index); - bytes_freed += sizeof(*page_index); + + metrics_number++; + metrics_bytes += sizeof(*page_index); + metrics_duration += metric_duration; } /* Free metrics index */ - ret_Judy = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0); + metrics_index_bytes = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0); fatal_assert(NULL == pg_cache->metrics_index.JudyHS_array); - bytes_freed += ret_Judy; - info("Freed %lu bytes of memory from page cache.", bytes_freed); + if(!metrics_number) metrics_number = 1; + if(!pages_number) pages_number = 1; + if(!cache_pages_number) cache_pages_number = 1; + if(!points_in_db) points_in_db = 1; + if(latest_time_ut == oldest_time_ut) oldest_time_ut -= USEC_PER_SEC; + + if(single_point_pages) { + long double avg_duration = (long double)seconds_in_db / points_in_db; + points_in_db += single_point_pages; + seconds_in_db += (size_t)(avg_duration * single_point_pages); + } + + info("DBENGINE STATISTICS ON METRICS:" + " Metrics: %lu (structures %lu bytes - per metric %0.2f, index (HS) %lu bytes - per metric %0.2f bytes - duration %zu secs) |" + " Page descriptors: %lu (structures %lu bytes - per page %0.2f bytes, index (L) %lu bytes - per page %0.2f, dirty index %lu bytes). |" + " Page cache: %lu pages (structures %lu bytes - per page %0.2f bytes, data %lu bytes). |" + " Points in db %zu, uncompressed size of points database %zu bytes. |" + " Duration of all points %zu seconds, average point duration %0.2f seconds." + " Duration of the database %llu seconds, average metric duration %0.2f seconds, average metric lifetime %0.2f%%." + , metrics_number, metrics_bytes, (double)metrics_bytes/metrics_number, metrics_index_bytes, (double)metrics_index_bytes/metrics_number, metrics_duration + , pages_number, pages_bytes, (double)pages_bytes/pages_number, pages_index_bytes, (double)pages_index_bytes/pages_number, pages_dirty_index_bytes + , cache_pages_number, cache_pages_bytes, (double)cache_pages_bytes/cache_pages_number, cache_pages_data_bytes + , points_in_db, uncompressed_points_size + , seconds_in_db, (double)seconds_in_db/points_in_db + , (latest_time_ut - oldest_time_ut) / USEC_PER_SEC, (double)metrics_duration/metrics_number + , (double)metrics_duration/metrics_number * 100.0 / ((latest_time_ut - oldest_time_ut) / USEC_PER_SEC) + ); + + for(int i = 0; i < 256 ;i++) { + if(pages_count_per_type[i]) + info("DBENGINE STATISTICS ON PAGE TYPES: page type %d total pages %lu, average page size %0.2f bytes", i, pages_count_per_type[i], (double)pages_size_per_type[i]/pages_count_per_type[i]); + } } -- cgit v1.2.3