diff options
Diffstat (limited to '')
-rw-r--r-- | database/engine/Makefile.in | 514 | ||||
-rw-r--r-- | database/engine/README.md | 42 | ||||
-rw-r--r-- | database/engine/journalfile.c | 9 | ||||
-rw-r--r-- | database/engine/pagecache.c | 284 | ||||
-rw-r--r-- | database/engine/pagecache.h | 33 | ||||
-rw-r--r-- | database/engine/rrdengine.c | 3 | ||||
-rw-r--r-- | database/engine/rrdengineapi.c | 298 | ||||
-rw-r--r-- | database/engine/rrdengineapi.h | 11 | ||||
-rw-r--r-- | database/engine/rrdenginelib.h | 2 |
9 files changed, 1097 insertions, 99 deletions
diff --git a/database/engine/Makefile.in b/database/engine/Makefile.in new file mode 100644 index 000000000..862693d94 --- /dev/null +++ b/database/engine/Makefile.in @@ -0,0 +1,514 @@ +# Makefile.in generated by automake 1.15.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2017 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# SPDX-License-Identifier: GPL-3.0-or-later + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = database/engine +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/build/m4/ax_c___atomic.m4 \ + $(top_srcdir)/build/m4/ax_c__generic.m4 \ + $(top_srcdir)/build/m4/ax_c_lto.m4 \ + $(top_srcdir)/build/m4/ax_c_mallinfo.m4 \ + $(top_srcdir)/build/m4/ax_c_mallopt.m4 \ + $(top_srcdir)/build/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/build/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/build/m4/ax_pthread.m4 \ + $(top_srcdir)/build/m4/jemalloc.m4 \ + $(top_srcdir)/build/m4/tcmalloc.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_noinst_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(dist_noinst_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CUPSCONFIG = @CUPSCONFIG@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CXX_BINARY = @CXX_BINARY@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +IPMIMONITORING_CFLAGS = @IPMIMONITORING_CFLAGS@ +IPMIMONITORING_LIBS = @IPMIMONITORING_LIBS@ +JSON_CFLAGS = @JSON_CFLAGS@ +JSON_LIBS = @JSON_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBCAP_CFLAGS = @LIBCAP_CFLAGS@ +LIBCAP_LIBS = @LIBCAP_LIBS@ +LIBCRYPTO_CFLAGS = @LIBCRYPTO_CFLAGS@ +LIBCRYPTO_LIBS = @LIBCRYPTO_LIBS@ +LIBCURL_CFLAGS = @LIBCURL_CFLAGS@ +LIBCURL_LIBS = @LIBCURL_LIBS@ +LIBMNL_CFLAGS = @LIBMNL_CFLAGS@ +LIBMNL_LIBS = @LIBMNL_LIBS@ +LIBMONGOC_CFLAGS = @LIBMONGOC_CFLAGS@ +LIBMONGOC_LIBS = @LIBMONGOC_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSSL_CFLAGS = @LIBSSL_CFLAGS@ +LIBSSL_LIBS = @LIBSSL_LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MATH_CFLAGS = @MATH_CFLAGS@ +MATH_LIBS = @MATH_LIBS@ +MKDIR_P = @MKDIR_P@ +NFACCT_CFLAGS = @NFACCT_CFLAGS@ +NFACCT_LIBS = @NFACCT_LIBS@ +OBJEXT = @OBJEXT@ +OPTIONAL_CUPS_CFLAGS = @OPTIONAL_CUPS_CFLAGS@ +OPTIONAL_CUPS_LIBS = @OPTIONAL_CUPS_LIBS@ +OPTIONAL_IPMIMONITORING_CFLAGS = @OPTIONAL_IPMIMONITORING_CFLAGS@ +OPTIONAL_IPMIMONITORING_LIBS = @OPTIONAL_IPMIMONITORING_LIBS@ +OPTIONAL_JSONC_LIBS = @OPTIONAL_JSONC_LIBS@ +OPTIONAL_JUDY_LIBS = @OPTIONAL_JUDY_LIBS@ +OPTIONAL_KINESIS_CFLAGS = @OPTIONAL_KINESIS_CFLAGS@ +OPTIONAL_KINESIS_LIBS = @OPTIONAL_KINESIS_LIBS@ +OPTIONAL_LIBCAP_CFLAGS = @OPTIONAL_LIBCAP_CFLAGS@ +OPTIONAL_LIBCAP_LIBS = @OPTIONAL_LIBCAP_LIBS@ +OPTIONAL_LZ4_LIBS = @OPTIONAL_LZ4_LIBS@ +OPTIONAL_MATH_CFLAGS = @OPTIONAL_MATH_CFLAGS@ +OPTIONAL_MATH_LIBS = @OPTIONAL_MATH_LIBS@ +OPTIONAL_MONGOC_CFLAGS = @OPTIONAL_MONGOC_CFLAGS@ +OPTIONAL_MONGOC_LIBS = @OPTIONAL_MONGOC_LIBS@ +OPTIONAL_NFACCT_CFLAGS = @OPTIONAL_NFACCT_CFLAGS@ +OPTIONAL_NFACCT_LIBS = @OPTIONAL_NFACCT_LIBS@ +OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS = @OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS@ +OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS = @OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS@ +OPTIONAL_SSL_LIBS = @OPTIONAL_SSL_LIBS@ +OPTIONAL_UUID_CFLAGS = @OPTIONAL_UUID_CFLAGS@ +OPTIONAL_UUID_LIBS = @OPTIONAL_UUID_LIBS@ +OPTIONAL_UV_LIBS = @OPTIONAL_UV_LIBS@ +OPTIONAL_XENSTAT_CFLAGS = @OPTIONAL_XENSTAT_CFLAGS@ +OPTIONAL_XENSTAT_LIBS = @OPTIONAL_XENSTAT_LIBS@ +OPTIONAL_ZLIB_CFLAGS = @OPTIONAL_ZLIB_CFLAGS@ +OPTIONAL_ZLIB_LIBS = @OPTIONAL_ZLIB_LIBS@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_RPM_VERSION = @PACKAGE_RPM_VERSION@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROTOBUF_CFLAGS = @PROTOBUF_CFLAGS@ +PROTOBUF_LIBS = @PROTOBUF_LIBS@ +PROTOC = @PROTOC@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SSE_CANDIDATE = @SSE_CANDIDATE@ +STRIP = @STRIP@ +UUID_CFLAGS = @UUID_CFLAGS@ +UUID_LIBS = @UUID_LIBS@ +VERSION = @VERSION@ +XENLIGHT_CFLAGS = @XENLIGHT_CFLAGS@ +XENLIGHT_LIBS = @XENLIGHT_LIBS@ +YAJL_CFLAGS = @YAJL_CFLAGS@ +YAJL_LIBS = @YAJL_LIBS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_target = @build_target@ +build_vendor = @build_vendor@ +builddir = @builddir@ +cachedir = @cachedir@ +chartsdir = @chartsdir@ +configdir = @configdir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +has_jemalloc = @has_jemalloc@ +has_tcmalloc = @has_tcmalloc@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libconfigdir = @libconfigdir@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +logdir = @logdir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +nodedir = @nodedir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pluginsdir = @pluginsdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +pythondir = @pythondir@ +registrydir = @registrydir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +varlibdir = @varlibdir@ +webdir = @webdir@ +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +dist_noinst_DATA = \ + README.md \ + $(NULL) + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu database/engine/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu database/engine/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic cscopelist-am \ + ctags-am distclean distclean-generic distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/database/engine/README.md b/database/engine/README.md index adc69ffd7..7791a549f 100644 --- a/database/engine/README.md +++ b/database/engine/README.md @@ -4,6 +4,8 @@ The Database Engine works like a traditional database. There is some amount of RAM dedicated to data caching and indexing and the rest of the data reside compressed on disk. The number of history entries is not fixed in this case, but depends on the configured disk space and the effective compression ratio of the data stored. +This is the **only mode** that supports changing the data collection update frequency +(`update_every`) **without losing** the previously stored metrics. ## Files @@ -18,18 +20,18 @@ journalfile-1-0000000002.njf datafile-1-0000000003.ndf journalfile-1-0000000003.njf ... -``` +``` They are located under their host's cache directory in the directory `./dbengine` (e.g. for localhost the default location is `/var/cache/netdata/dbengine/*`). The higher numbered filenames contain more recent metric data. The user can safely delete some pairs -of files when netdata is stopped to manually free up some space. +of files when Netdata is stopped to manually free up some space. -*Users should* **back up** *their `./dbengine` folders if they consider this data to be important.* +_Users should_ **back up** _their `./dbengine` folders if they consider this data to be important._ ## Configuration -There is one DB engine instance per netdata host/node. That is, there is one `./dbengine` folder +There is one DB engine instance per Netdata host/node. That is, there is one `./dbengine` folder per node, and all charts of `dbengine` memory mode in such a host share the same storage space and DB engine instance memory state. You can select the memory mode for localhost by editing netdata.conf and setting: @@ -59,10 +61,10 @@ quota. Both numbers are in **MiB**. All DB engine instances will allocate the co separately. The `page cache size` option determines the amount of RAM in **MiB** that is dedicated to caching -netdata metric values themselves. +Netdata metric values themselves. The `dbengine disk space` option determines the amount of disk space in **MiB** that is dedicated -to storing netdata metric values and all related metadata describing them. +to storing Netdata metric values and all related metadata describing them. ## Operation @@ -72,7 +74,7 @@ the **Page Cache**. When those pages fill up they are slowly compressed and flushed to disk. It can take `4096 / 4 = 1024 seconds = 17 minutes`, for a chart dimension that is being collected -every 1 second, to fill a page. Pages can be cut short when we stop netdata or the DB engine +every 1 second, to fill a page. Pages can be cut short when we stop Netdata or the DB engine instance so as to not lose the data. When we query the DB engine for data we trigger disk read I/O requests that fill the Page Cache with the requested pages and potentially evict cold (not recently used) pages. @@ -91,17 +93,17 @@ applications. Using memory mode `dbengine` we can overcome most memory restrictions and store a dataset that is much larger than the available memory. -There are explicit memory requirements **per** DB engine **instance**, meaning **per** netdata +There are explicit memory requirements **per** DB engine **instance**, meaning **per** Netdata **node** (e.g. localhost and streaming recipient nodes): -- `page cache size` must be at least `#dimensions-being-collected x 4096 x 2` bytes. +- `page cache size` must be at least `#dimensions-being-collected x 4096 x 2` bytes. -- an additional `#pages-on-disk x 4096 x 0.03` bytes of RAM are allocated for metadata. +- an additional `#pages-on-disk x 4096 x 0.03` bytes of RAM are allocated for metadata. - - roughly speaking this is 3% of the uncompressed disk space taken by the DB files. + - roughly speaking this is 3% of the uncompressed disk space taken by the DB files. - - for very highly compressible data (compression ratio > 90%) this RAM overhead - is comparable to the disk space footprint. + - for very highly compressible data (compression ratio > 90%) this RAM overhead + is comparable to the disk space footprint. An important observation is that RAM usage depends on both the `page cache size` and the `dbengine disk space` options. @@ -115,11 +117,11 @@ file descriptors available per `dbengine` instance. Netdata allocates 25% of the available file descriptors to its Database Engine instances. This means that only 25% of the file descriptors that are available to the Netdata service are accessible by dbengine instances. You should take that into account when configuring your service -or system-wide file descriptor limits. You can roughly estimate that the netdata service needs 2048 file +or system-wide file descriptor limits. You can roughly estimate that the Netdata service needs 2048 file descriptors for every 10 streaming slave hosts when streaming is configured to use `memory mode = dbengine`. -If for example one wants to allocate 65536 file descriptors to the netdata service on a systemd system -one needs to override the netdata service by running `sudo systemctl edit netdata` and creating a +If for example one wants to allocate 65536 file descriptors to the Netdata service on a systemd system +one needs to override the Netdata service by running `sudo systemctl edit netdata` and creating a file with contents: ``` @@ -128,18 +130,24 @@ LimitNOFILE=65536 ``` For other types of services one can add the line: + ``` ulimit -n 65536 ``` + at the beginning of the service file. Alternatively you can change the system-wide limits of the kernel by changing `/etc/sysctl.conf`. For linux that would be: + ``` fs.file-max = 65536 ``` + In FreeBSD and OS X you change the lines like this: + ``` kern.maxfilesperproc=65536 kern.maxfiles=65536 ``` + You can apply the settings by running `sysctl -p` or by rebooting. -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdatabase%2Fengine%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdatabase%2Fengine%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c index d6e4f3174..fac680aa0 100644 --- a/database/engine/journalfile.c +++ b/database/engine/journalfile.c @@ -269,7 +269,6 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0])); extent->offset = jf_metric_data->extent_offset; extent->size = jf_metric_data->extent_size; - extent->number_of_pages = count; extent->datafile = journalfile->datafile; extent->next = NULL; @@ -282,7 +281,6 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden error("Unknown page type encountered."); continue; } - ++valid_pages; temp_id = (uuid_t *)jf_metric_data->descr[i].uuid; uv_rwlock_rdlock(&pg_cache->metrics_index.lock); @@ -308,11 +306,16 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden descr->end_time = jf_metric_data->descr[i].end_time; descr->id = &page_index->id; descr->extent = extent; - extent->pages[i] = descr; + extent->pages[valid_pages++] = descr; pg_cache_insert(ctx, page_index, descr); } + + extent->number_of_pages = valid_pages; + if (likely(valid_pages)) df_extent_insert(extent); + else + freez(extent); } /* diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index 124f2448b..1bd4c9436 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -419,6 +419,35 @@ static inline int is_point_in_time_in_page(struct rrdeng_page_descr *descr, usec return (point_in_time >= descr->start_time && point_in_time <= descr->end_time); } +/* The caller must hold the page index lock */ +static inline struct rrdeng_page_descr * + find_first_page_in_time_range(struct pg_cache_page_index *page_index, usec_t start_time, usec_t end_time) +{ + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + Word_t Index; + + Index = (Word_t)(start_time / USEC_PER_SEC); + PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + if (is_page_in_time_range(descr, start_time, end_time)) { + return descr; + } + } + + Index = (Word_t)(start_time / USEC_PER_SEC); + PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); + if (likely(NULL != PValue)) { + descr = *PValue; + if (is_page_in_time_range(descr, start_time, end_time)) { + return descr; + } + } + + return NULL; +} + /* Update metric oldest and latest timestamps efficiently when adding new values */ void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr) { @@ -510,70 +539,144 @@ void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); } -/* - * Searches for a page and triggers disk I/O if necessary and possible. +usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + return INVALID_TIME; + } + + uv_rwlock_rdlock(&page_index->lock); + descr = find_first_page_in_time_range(page_index, start_time, end_time); + if (NULL == descr) { + uv_rwlock_rdunlock(&page_index->lock); + return INVALID_TIME; + } + uv_rwlock_rdunlock(&page_index->lock); + return descr->start_time; +} + +/** + * Return page information for the first page before point_in_time that satisfies the filter. + * @param ctx DB context + * @param page_index page index of a metric + * @param point_in_time the pages that are searched must be older than this timestamp + * @param filter decides if the page satisfies the caller's criteria + * @param page_info the result of the search is set in this pointer + */ +void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, + usec_t point_in_time, pg_cache_page_info_filter_t *filter, + struct rrdeng_page_info *page_info) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + Pvoid_t *PValue; + Word_t Index; + + (void)pg_cache; + assert(NULL != page_index); + + Index = (Word_t)(point_in_time / USEC_PER_SEC); + uv_rwlock_rdlock(&page_index->lock); + do { + PValue = JudyLPrev(page_index->JudyL_array, &Index, PJE0); + descr = unlikely(NULL == PValue) ? NULL : *PValue; + } while (descr != NULL && !filter(descr)); + if (unlikely(NULL == descr)) { + page_info->page_length = 0; + page_info->start_time = INVALID_TIME; + page_info->end_time = INVALID_TIME; + } else { + page_info->page_length = descr->page_length; + page_info->start_time = descr->start_time; + page_info->end_time = descr->end_time; + } + uv_rwlock_rdunlock(&page_index->lock); +} +/** + * Searches for pages in a time range and triggers disk I/O if necessary and possible. * Does not get a reference. - * Returns page index pointer for given metric UUID. + * @param ctx DB context + * @param id UUID + * @param start_time inclusive starting time in usec + * @param end_time inclusive ending time in usec + * @param page_info_arrayp It allocates (*page_arrayp) and populates it with information of pages that overlap + * with the time range [start_time,end_time]. The caller must free (*page_info_arrayp) with freez(). + * If page_info_arrayp is set to NULL nothing was allocated. + * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID. + * @return the number of pages that overlap with the time range [start_time,end_time]. */ -struct pg_cache_page_index * - pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time) +unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time, + struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp) { struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = NULL, *preload_array[PAGE_CACHE_MAX_PRELOAD_PAGES]; struct page_cache_descr *pg_cache_descr = NULL; - int i, j, k, count, found; + unsigned i, j, k, preload_count, count, page_info_array_max_size; unsigned long flags; Pvoid_t *PValue; struct pg_cache_page_index *page_index; Word_t Index; uint8_t failed_to_reserve; + assert(NULL != ret_page_indexp); + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); if (likely(NULL != PValue)) { - page_index = *PValue; + *ret_page_indexp = page_index = *PValue; } uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); if (NULL == PValue) { debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__); - return NULL; + *ret_page_indexp = NULL; + return 0; } uv_rwlock_rdlock(&page_index->lock); - /* Find first page in range */ - found = 0; - Index = (Word_t)(start_time / USEC_PER_SEC); - PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0); - if (likely(NULL != PValue)) { - descr = *PValue; - if (is_page_in_time_range(descr, start_time, end_time)) { - found = 1; - } - } - if (!found) { - Index = (Word_t)(start_time / USEC_PER_SEC); - PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); - if (likely(NULL != PValue)) { - descr = *PValue; - if (is_page_in_time_range(descr, start_time, end_time)) { - found = 1; - } - } - } - if (!found) { + descr = find_first_page_in_time_range(page_index, start_time, end_time); + if (NULL == descr) { uv_rwlock_rdunlock(&page_index->lock); debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__); - return page_index; + *ret_page_indexp = NULL; + return 0; + } else { + Index = (Word_t)(descr->start_time / USEC_PER_SEC); + } + if (page_info_arrayp) { + page_info_array_max_size = PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); + *page_info_arrayp = mallocz(page_info_array_max_size); } - for (count = 0 ; - descr != NULL && is_page_in_time_range(descr, start_time, end_time); + for (count = 0, preload_count = 0 ; + descr != NULL && is_page_in_time_range(descr, start_time, end_time) ; PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0), descr = unlikely(NULL == PValue) ? NULL : *PValue) { /* Iterate all pages in range */ if (unlikely(0 == descr->page_length)) continue; + if (page_info_arrayp) { + if (unlikely(count >= page_info_array_max_size / sizeof(struct rrdeng_page_info))) { + page_info_array_max_size += PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); + *page_info_arrayp = reallocz(*page_info_arrayp, page_info_array_max_size); + } + (*page_info_arrayp)[count].start_time = descr->start_time; + (*page_info_arrayp)[count].end_time = descr->end_time; + (*page_info_arrayp)[count].page_length = descr->page_length; + } + ++count; + rrdeng_page_descr_mutex_lock(ctx, descr); pg_cache_descr = descr->pg_cache_descr; flags = pg_cache_descr->flags; @@ -586,8 +689,8 @@ struct pg_cache_page_index * } } if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) { - preload_array[count++] = descr; - if (PAGE_CACHE_MAX_PRELOAD_PAGES == count) { + preload_array[preload_count++] = descr; + if (PAGE_CACHE_MAX_PRELOAD_PAGES == preload_count) { rrdeng_page_descr_mutex_unlock(ctx, descr); break; } @@ -598,7 +701,7 @@ struct pg_cache_page_index * uv_rwlock_rdunlock(&page_index->lock); failed_to_reserve = 0; - for (i = 0 ; i < count && !failed_to_reserve ; ++i) { + for (i = 0 ; i < preload_count && !failed_to_reserve ; ++i) { struct rrdeng_cmd cmd; struct rrdeng_page_descr *next; @@ -614,7 +717,7 @@ struct pg_cache_page_index * cmd.read_extent.page_cache_descr[0] = descr; /* don't use this page again */ preload_array[i] = NULL; - for (j = 0, k = 1 ; j < count ; ++j) { + for (j = 0, k = 1 ; j < preload_count ; ++j) { next = preload_array[j]; if (NULL == next) { continue; @@ -635,7 +738,7 @@ struct pg_cache_page_index * } if (failed_to_reserve) { debug(D_RRDENGINE, "%s: Failed to reserve enough memory, canceling I/O.", __func__); - for (i = 0 ; i < count ; ++i) { + for (i = 0 ; i < preload_count ; ++i) { descr = preload_array[i]; if (NULL == descr) { continue; @@ -643,11 +746,15 @@ struct pg_cache_page_index * pg_cache_put(ctx, descr); } } - if (!count) { + if (!preload_count) { /* no such page */ debug(D_RRDENGINE, "%s: No page was eligible to attempt preload.", __func__); } - return page_index; + if (unlikely(0 == count && page_info_arrayp)) { + freez(*page_info_arrayp); + *page_info_arrayp = NULL; + } + return count; } /* @@ -757,6 +864,105 @@ struct rrdeng_page_descr * return descr; } +/* + * Searches for the first page between start_time and end_time and gets a reference. + * start_time and end_time are inclusive. + * If index is NULL lookup by UUID (id). + */ +struct rrdeng_page_descr * +pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t start_time, usec_t end_time) +{ + struct page_cache *pg_cache = &ctx->pg_cache; + struct rrdeng_page_descr *descr = NULL; + struct page_cache_descr *pg_cache_descr = NULL; + unsigned long flags; + Pvoid_t *PValue; + struct pg_cache_page_index *page_index; + uint8_t page_not_in_cache; + + if (unlikely(NULL == index)) { + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + page_index = *PValue; + } + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); + if (NULL == PValue) { + return NULL; + } + } else { + page_index = index; + } + pg_cache_reserve_pages(ctx, 1); + + page_not_in_cache = 0; + uv_rwlock_rdlock(&page_index->lock); + while (1) { + descr = find_first_page_in_time_range(page_index, start_time, end_time); + if (NULL == descr || 0 == descr->page_length) { + /* non-empty page not found */ + uv_rwlock_rdunlock(&page_index->lock); + + pg_cache_release_pages(ctx, 1); + return NULL; + } + rrdeng_page_descr_mutex_lock(ctx, descr); + pg_cache_descr = descr->pg_cache_descr; + flags = pg_cache_descr->flags; + if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) { + /* success */ + rrdeng_page_descr_mutex_unlock(ctx, descr); + debug(D_RRDENGINE, "%s: Page was found in memory.", __func__); + break; + } + if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) { + struct rrdeng_cmd cmd; + + uv_rwlock_rdunlock(&page_index->lock); + + cmd.opcode = RRDENG_READ_PAGE; + cmd.read_page.page_cache_descr = descr; + rrdeng_enq_cmd(&ctx->worker_config, &cmd); + + debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr); + while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) { + pg_cache_wait_event_unsafe(descr); + } + /* success */ + /* Downgrade exclusive reference to allow other readers */ + pg_cache_descr->flags &= ~RRD_PAGE_LOCKED; + pg_cache_wake_up_waiters_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + return descr; + } + uv_rwlock_rdunlock(&page_index->lock); + debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__); + if(unlikely(debug_flags & D_RRDENGINE)) + print_page_cache_descr(descr); + if (!(flags & RRD_PAGE_POPULATED)) + page_not_in_cache = 1; + pg_cache_wait_event_unsafe(descr); + rrdeng_page_descr_mutex_unlock(ctx, descr); + + /* reset scan to find again */ + uv_rwlock_rdlock(&page_index->lock); + } + uv_rwlock_rdunlock(&page_index->lock); + + if (!(flags & RRD_PAGE_DIRTY)) + pg_cache_replaceQ_set_hot(ctx, descr); + pg_cache_release_pages(ctx, 1); + if (page_not_in_cache) + rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1); + else + rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1); + return descr; +} + struct pg_cache_page_index *create_page_index(uuid_t *id) { struct pg_cache_page_index *page_index; diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h index b5670f82a..d464211e9 100644 --- a/database/engine/pagecache.h +++ b/database/engine/pagecache.h @@ -48,9 +48,6 @@ struct page_cache_descr { * number of descriptor users | DESTROY | LOCKED | ALLOCATED | */ struct rrdeng_page_descr { - uint32_t page_length; - usec_t start_time; - usec_t end_time; uuid_t *id; /* never changes */ struct extent_info *extent; @@ -59,8 +56,25 @@ struct rrdeng_page_descr { /* Compare-And-Swap target for page cache descriptor allocation algorithm */ volatile unsigned long pg_cache_descr_state; + + /* page information */ + usec_t start_time; + usec_t end_time; + uint32_t page_length; }; +#define PAGE_INFO_SCRATCH_SZ (8) +struct rrdeng_page_info { + uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */ + + usec_t start_time; + usec_t end_time; + uint32_t page_length; +}; + +/* returns 1 for success, 0 for failure */ +typedef int pg_cache_page_info_filter_t(struct rrdeng_page_descr *); + #define PAGE_CACHE_MAX_PRELOAD_PAGES (256) /* maps time ranges to pages */ @@ -149,11 +163,20 @@ extern void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_desc extern void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, struct rrdeng_page_descr *descr); extern void pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, uint8_t remove_dirty); -extern struct pg_cache_page_index * - pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time); +extern usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time, usec_t end_time); +extern void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, + usec_t point_in_time, pg_cache_page_info_filter_t *filter, + struct rrdeng_page_info *page_info); +extern unsigned + pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time, + struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp); extern struct rrdeng_page_descr * pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, usec_t point_in_time); +extern struct rrdeng_page_descr * + pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, + usec_t start_time, usec_t end_time); extern struct pg_cache_page_index *create_page_index(uuid_t *id); extern void init_page_cache(struct rrdengine_instance *ctx); extern void free_page_cache(struct rrdengine_instance *ctx); diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c index 221216bb3..39d148578 100644 --- a/database/engine/rrdengine.c +++ b/database/engine/rrdengine.c @@ -24,6 +24,9 @@ void sanity_check(void) /* page count must fit in 8 bits */ BUILD_BUG_ON(MAX_PAGES_PER_EXTENT > 255); + + /* page info scratch space must be able to hold 2 32-bit integers */ + BUILD_BUG_ON(sizeof(((struct rrdeng_page_info *)0)->scratch) < 2 * sizeof(uint32_t)); } void read_extent_cb(uv_fs_t* req) diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c index a87ce6d64..bf373f31c 100644 --- a/database/engine/rrdengineapi.c +++ b/database/engine/rrdengineapi.c @@ -218,6 +218,208 @@ void rrdeng_store_metric_finalize(RRDDIM *rd) } } +/* Returns 1 if the data collection interval is well defined, 0 otherwise */ +static int metrics_with_known_interval(struct rrdeng_page_descr *descr) +{ + unsigned page_entries; + + if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == descr->end_time)) + return 0; + page_entries = descr->page_length / sizeof(storage_number); + if (likely(page_entries > 1)) { + return 1; + } + return 0; +} + +static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info) +{ + return (uint32_t *)&page_info->scratch[0]; +} + +static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info) +{ + return (uint32_t *)&page_info->scratch[sizeof(uint32_t)]; +} + +/** + * Calculates the regions of different data collection intervals in a netdata chart in the time range + * [start_time,end_time]. This call takes the netdata chart read lock. + * @param st the netdata chart whose data collection interval boundaries are calculated. + * @param start_time inclusive starting time in usec + * @param end_time inclusive ending time in usec + * @param region_info_arrayp It allocates (*region_info_arrayp) and populates it with information of regions of a + * reference dimension that that have different data collection intervals and overlap with the time range + * [start_time,end_time]. The caller must free (*region_info_arrayp) with freez(). If region_info_arrayp is set + * to NULL nothing was allocated. + * @param max_intervalp is derefenced and set to be the largest data collection interval of all regions. + * @return number of regions with different data collection intervals. + */ +unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time, + struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp) +{ + struct pg_cache_page_index *page_index; + struct rrdengine_instance *ctx; + unsigned pages_nr; + RRDDIM *rd_iter, *rd; + struct rrdeng_page_info *page_info_array, *curr, *prev, *old_prev; + unsigned i, j, page_entries, region_points, page_points, regions, max_interval; + time_t now; + usec_t dt, current_position_time, max_time = 0, min_time, curr_time, first_valid_time_in_page; + struct rrdeng_region_info *region_info_array; + uint8_t is_first_region_initialized; + + ctx = st->rrdhost->rrdeng_ctx; + regions = 1; + *max_intervalp = max_interval = 0; + region_info_array = NULL; + *region_info_arrayp = NULL; + page_info_array = NULL; + + rrdset_rdlock(st); + for(rd_iter = st->dimensions, rd = NULL, min_time = (usec_t)-1 ; rd_iter ; rd_iter = rd_iter->next) { + /* + * Choose oldest dimension as reference. This is not equivalent to the union of all dimensions + * but it is a best effort approximation with a bias towards older metrics in a chart. It + * matches netdata behaviour in the sense that dimensions are generally aligned in a chart + * and older dimensions contain more information about the time range. It does not work well + * for metrics that have recently stopped being collected. + */ + curr_time = pg_cache_oldest_time_in_range(ctx, rd_iter->state->rrdeng_uuid, + start_time * USEC_PER_SEC, end_time * USEC_PER_SEC); + if (INVALID_TIME != curr_time && curr_time < min_time) { + rd = rd_iter; + min_time = curr_time; + } + } + rrdset_unlock(st); + if (NULL == rd) { + return 1; + } + pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC, + &page_info_array, &page_index); + if (pages_nr) { + /* conservative allocation, will reduce the size later if necessary */ + region_info_array = mallocz(sizeof(*region_info_array) * pages_nr); + } + is_first_region_initialized = 0; + region_points = 0; + + /* pages loop */ + for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) { + old_prev = prev; + prev = curr; + curr = &page_info_array[i]; + *pginfo_to_points(curr) = 0; /* initialize to invalid page */ + *pginfo_to_dt(curr) = 0; /* no known data collection interval yet */ + if (unlikely(INVALID_TIME == curr->start_time || INVALID_TIME == curr->end_time)) { + info("Ignoring page with invalid timestamp."); + prev = old_prev; + continue; + } + page_entries = curr->page_length / sizeof(storage_number); + assert(0 != page_entries); + if (likely(1 != page_entries)) { + dt = (curr->end_time - curr->start_time) / (page_entries - 1); + *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(dt); + if (unlikely(0 == *pginfo_to_dt(curr))) + *pginfo_to_dt(curr) = 1; + } else { + dt = 0; + } + for (j = 0, page_points = 0 ; j < page_entries ; ++j) { + uint8_t is_metric_out_of_order, is_metric_earlier_than_range; + + is_metric_earlier_than_range = 0; + is_metric_out_of_order = 0; + + current_position_time = curr->start_time + j * dt; + now = current_position_time / USEC_PER_SEC; + if (now > end_time) { /* there will be no more pages in the time range */ + break; + } + if (now < start_time) + is_metric_earlier_than_range = 1; + if (unlikely(current_position_time < max_time)) /* just went back in time */ + is_metric_out_of_order = 1; + if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) { + if (unlikely(is_metric_out_of_order)) + info("Ignoring metric with out of order timestamp."); + continue; /* next entry */ + } + /* here is a valid metric */ + ++page_points; + region_info_array[regions - 1].points = ++region_points; + max_time = current_position_time; + if (1 == page_points) + first_valid_time_in_page = current_position_time; + if (unlikely(!is_first_region_initialized)) { + assert(1 == regions); + /* this is the first region */ + region_info_array[0].start_time = current_position_time; + is_first_region_initialized = 1; + } + } + *pginfo_to_points(curr) = page_points; + if (0 == page_points) { + prev = old_prev; + continue; + } + + if (unlikely(0 == dt)) { /* unknown data collection interval */ + assert(1 == page_points); + + if (likely(NULL != prev)) { /* get interval from previous page */ + *pginfo_to_dt(curr) = *pginfo_to_dt(prev); + } else { /* there is no previous page in the query */ + struct rrdeng_page_info db_page_info; + + /* go to database */ + pg_cache_get_filtered_info_prev(ctx, page_index, curr->start_time, + metrics_with_known_interval, &db_page_info); + if (unlikely(db_page_info.start_time == INVALID_TIME || db_page_info.end_time == INVALID_TIME || + 0 == db_page_info.page_length)) { /* nothing in the database, default to update_every */ + *pginfo_to_dt(curr) = rd->update_every; + } else { + unsigned db_entries; + usec_t db_dt; + + db_entries = db_page_info.page_length / sizeof(storage_number); + db_dt = (db_page_info.end_time - db_page_info.start_time) / (db_entries - 1); + *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(db_dt); + if (unlikely(0 == *pginfo_to_dt(curr))) + *pginfo_to_dt(curr) = 1; + + } + } + } + if (likely(prev) && unlikely(*pginfo_to_dt(curr) != *pginfo_to_dt(prev))) { + info("Data collection interval change detected in query: %"PRIu32" -> %"PRIu32, + *pginfo_to_dt(prev), *pginfo_to_dt(curr)); + region_info_array[regions++ - 1].points -= page_points; + region_info_array[regions - 1].points = region_points = page_points; + region_info_array[regions - 1].start_time = first_valid_time_in_page; + } + if (*pginfo_to_dt(curr) > max_interval) + max_interval = *pginfo_to_dt(curr); + region_info_array[regions - 1].update_every = *pginfo_to_dt(curr); + } + if (page_info_array) + freez(page_info_array); + if (region_info_array) { + if (likely(is_first_region_initialized)) { + /* free unnecessary memory */ + region_info_array = reallocz(region_info_array, sizeof(*region_info_array) * regions); + *region_info_arrayp = region_info_array; + *max_intervalp = max_interval; + } else { + /* empty result */ + freez(region_info_array); + } + } + return regions; +} + /* * Gets a handle for loading metrics from the database. * The handle must be released with rrdeng_load_metric_final(). @@ -226,80 +428,108 @@ void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_hand { struct rrdeng_query_handle *handle; struct rrdengine_instance *ctx; + unsigned pages_nr; ctx = rd->rrdset->rrdhost->rrdeng_ctx; rrdimm_handle->start_time = start_time; rrdimm_handle->end_time = end_time; handle = &rrdimm_handle->rrdeng; + handle->next_page_time = start_time; handle->now = start_time; - handle->dt = rd->rrdset->update_every; + handle->position = 0; handle->ctx = ctx; handle->descr = NULL; - handle->page_index = pg_cache_preload(ctx, rd->state->rrdeng_uuid, - start_time * USEC_PER_SEC, end_time * USEC_PER_SEC); + pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC, + NULL, &handle->page_index); + if (unlikely(NULL == handle->page_index || 0 == pages_nr)) + /* there are no metrics to load */ + handle->next_page_time = INVALID_TIME; } -storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle) +/* Returns the metric and sets its timestamp into current_time */ +storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time) { struct rrdeng_query_handle *handle; struct rrdengine_instance *ctx; struct rrdeng_page_descr *descr; storage_number *page, ret; - unsigned position; - usec_t point_in_time; + unsigned position, entries; + usec_t next_page_time, current_position_time; handle = &rrdimm_handle->rrdeng; - if (unlikely(INVALID_TIME == handle->now)) { + if (unlikely(INVALID_TIME == handle->next_page_time)) { return SN_EMPTY_SLOT; } ctx = handle->ctx; - point_in_time = handle->now * USEC_PER_SEC; - descr = handle->descr; - - if (unlikely(NULL == handle->page_index)) { - ret = SN_EMPTY_SLOT; - goto out; + if (unlikely(NULL == (descr = handle->descr))) { + /* it's the first call */ + next_page_time = handle->next_page_time * USEC_PER_SEC; } + position = handle->position + 1; + if (unlikely(NULL == descr || - point_in_time < descr->start_time || - point_in_time > descr->end_time)) { + position >= (descr->page_length / sizeof(storage_number)))) { + /* We need to get a new page */ if (descr) { + /* Drop old page's reference */ + handle->next_page_time = (descr->end_time / USEC_PER_SEC) + 1; + if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) { + goto no_more_metrics; + } + next_page_time = handle->next_page_time * USEC_PER_SEC; #ifdef NETDATA_INTERNAL_CHECKS rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1); #endif pg_cache_put(ctx, descr); handle->descr = NULL; } - descr = pg_cache_lookup(ctx, handle->page_index, &handle->page_index->id, point_in_time); + descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, + next_page_time, rrdimm_handle->end_time * USEC_PER_SEC); if (NULL == descr) { - ret = SN_EMPTY_SLOT; - goto out; + goto no_more_metrics; } #ifdef NETDATA_INTERNAL_CHECKS rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1); #endif handle->descr = descr; - } - if (unlikely(INVALID_TIME == descr->start_time || - INVALID_TIME == descr->end_time)) { - ret = SN_EMPTY_SLOT; - goto out; + if (unlikely(INVALID_TIME == descr->start_time || + INVALID_TIME == descr->end_time)) { + goto no_more_metrics; + } + if (unlikely(descr->start_time != descr->end_time && next_page_time > descr->start_time)) { + /* we're in the middle of the page somewhere */ + entries = descr->page_length / sizeof(storage_number); + position = ((uint64_t)(next_page_time - descr->start_time)) * entries / + (descr->end_time - descr->start_time + 1); + } else { + position = 0; + } } page = descr->pg_cache_descr->page; - if (unlikely(descr->start_time == descr->end_time)) { - ret = page[0]; - goto out; - } - position = ((uint64_t)(point_in_time - descr->start_time)) * (descr->page_length / sizeof(storage_number)) / - (descr->end_time - descr->start_time + 1); ret = page[position]; + entries = descr->page_length / sizeof(storage_number); + if (entries > 1) { + usec_t dt; -out: - handle->now += handle->dt; - if (unlikely(handle->now > rrdimm_handle->end_time)) { - handle->now = INVALID_TIME; + dt = (descr->end_time - descr->start_time) / (entries - 1); + current_position_time = descr->start_time + position * dt; + } else { + current_position_time = descr->start_time; } + handle->position = position; + handle->now = current_position_time / USEC_PER_SEC; +/* assert(handle->now >= rrdimm_handle->start_time && handle->now <= rrdimm_handle->end_time); + The above assertion is an approximation and needs to take update_every into account */ + if (unlikely(handle->now >= rrdimm_handle->end_time)) { + /* next calls will not load any more metrics */ + handle->next_page_time = INVALID_TIME; + } + *current_time = handle->now; return ret; + +no_more_metrics: + handle->next_page_time = INVALID_TIME; + return SN_EMPTY_SLOT; } int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle) @@ -307,7 +537,7 @@ int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle) struct rrdeng_query_handle *handle; handle = &rrdimm_handle->rrdeng; - return (INVALID_TIME == handle->now); + return (INVALID_TIME == handle->next_page_time); } /* diff --git a/database/engine/rrdengineapi.h b/database/engine/rrdengineapi.h index e52aabcbe..9b1ab1874 100644 --- a/database/engine/rrdengineapi.h +++ b/database/engine/rrdengineapi.h @@ -15,6 +15,12 @@ extern int default_rrdeng_page_cache_mb; extern int default_rrdeng_disk_quota_mb; +struct rrdeng_region_info { + time_t start_time; + int update_every; + unsigned points; +}; + extern void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr); extern void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, Word_t page_correlation_id); @@ -25,9 +31,12 @@ extern void rrdeng_store_metric_init(RRDDIM *rd); extern void rrdeng_store_metric_flush_current_page(RRDDIM *rd); extern void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number); extern void rrdeng_store_metric_finalize(RRDDIM *rd); +extern unsigned + rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time, + struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp); extern void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time); -extern storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle); +extern storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time); extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle); extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle); extern time_t rrdeng_metric_latest_time(RRDDIM *rd); diff --git a/database/engine/rrdenginelib.h b/database/engine/rrdenginelib.h index 36d414e89..5685b65a5 100644 --- a/database/engine/rrdenginelib.h +++ b/database/engine/rrdenginelib.h @@ -23,6 +23,8 @@ struct rrdeng_page_descr; #define ALIGN_BYTES_FLOOR(x) (((x) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE) #define ALIGN_BYTES_CEILING(x) ((((x) + RRDENG_BLOCK_SIZE - 1) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE) +#define ROUND_USEC_TO_SEC(x) (((x) + USEC_PER_SEC / 2 - 1) / USEC_PER_SEC) + typedef uintptr_t rrdeng_stats_t; #ifdef __ATOMIC_RELAXED |